Merge pull request #3821 from janhq/feat/path-to-cortexcpp
feat: Jan Integrates Cortex.cpp as Provider
This commit is contained in:
commit
a82c701087
@ -319,6 +319,13 @@ jobs:
|
|||||||
# TURBO_TEAM: 'linux'
|
# TURBO_TEAM: 'linux'
|
||||||
# TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'
|
# TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
if: always()
|
||||||
|
with:
|
||||||
|
name: playwright-report
|
||||||
|
path: electron/playwright-report/
|
||||||
|
retention-days: 2
|
||||||
|
|
||||||
coverage-check:
|
coverage-check:
|
||||||
runs-on: [self-hosted, Linux, ubuntu-desktop]
|
runs-on: [self-hosted, Linux, ubuntu-desktop]
|
||||||
needs: base_branch_cov
|
needs: base_branch_cov
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
npm run lint --fix
|
npx oxlint@latest --fix
|
||||||
@ -1,6 +1,8 @@
|
|||||||
import { SettingComponentProps } from '../types'
|
import { Model, ModelEvent, SettingComponentProps } from '../types'
|
||||||
import { getJanDataFolderPath, joinPath } from './core'
|
import { getJanDataFolderPath, joinPath } from './core'
|
||||||
|
import { events } from './events'
|
||||||
import { fs } from './fs'
|
import { fs } from './fs'
|
||||||
|
import { ModelManager } from './models'
|
||||||
|
|
||||||
export enum ExtensionTypeEnum {
|
export enum ExtensionTypeEnum {
|
||||||
Assistant = 'assistant',
|
Assistant = 'assistant',
|
||||||
@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
|
|||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Registers models - it persists in-memory shared ModelManager instance's data map.
|
||||||
|
* @param models
|
||||||
|
*/
|
||||||
|
async registerModels(models: Model[]): Promise<void> {
|
||||||
|
for (const model of models) {
|
||||||
|
ModelManager.instance().register(model)
|
||||||
|
}
|
||||||
|
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register settings for the extension.
|
||||||
|
* @param settings
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
async registerSettings(settings: SettingComponentProps[]): Promise<void> {
|
async registerSettings(settings: SettingComponentProps[]): Promise<void> {
|
||||||
if (!this.name) {
|
if (!this.name) {
|
||||||
console.error('Extension name is not defined')
|
console.error('Extension name is not defined')
|
||||||
@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the setting value for the key.
|
||||||
|
* @param key
|
||||||
|
* @param defaultValue
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
async getSetting<T>(key: string, defaultValue: T) {
|
async getSetting<T>(key: string, defaultValue: T) {
|
||||||
const keySetting = (await this.getSettings()).find((setting) => setting.key === key)
|
const keySetting = (await this.getSettings()).find((setting) => setting.key === key)
|
||||||
|
|
||||||
@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the settings for the extension.
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
async getSettings(): Promise<SettingComponentProps[]> {
|
async getSettings(): Promise<SettingComponentProps[]> {
|
||||||
if (!this.name) return []
|
if (!this.name) return []
|
||||||
|
|
||||||
@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the settings for the extension.
|
||||||
|
* @param componentProps
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
|
async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
|
||||||
if (!this.name) return
|
if (!this.name) return
|
||||||
|
|
||||||
|
|||||||
@ -1,8 +1,6 @@
|
|||||||
import { AIEngine } from './AIEngine'
|
import { AIEngine } from './AIEngine'
|
||||||
import { events } from '../../events'
|
import { events } from '../../events'
|
||||||
import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
|
import { ModelEvent, Model } from '../../../types'
|
||||||
import { EngineManager } from './EngineManager'
|
|
||||||
import { fs } from '../../fs'
|
|
||||||
|
|
||||||
jest.mock('../../events')
|
jest.mock('../../events')
|
||||||
jest.mock('./EngineManager')
|
jest.mock('./EngineManager')
|
||||||
@ -26,7 +24,7 @@ describe('AIEngine', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it('should load model if provider matches', async () => {
|
it('should load model if provider matches', async () => {
|
||||||
const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
|
const model: any = { id: 'model1', engine: 'test-provider' } as any
|
||||||
|
|
||||||
await engine.loadModel(model)
|
await engine.loadModel(model)
|
||||||
|
|
||||||
@ -34,7 +32,7 @@ describe('AIEngine', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it('should not load model if provider does not match', async () => {
|
it('should not load model if provider does not match', async () => {
|
||||||
const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
|
const model: any = { id: 'model1', engine: 'other-provider' } as any
|
||||||
|
|
||||||
await engine.loadModel(model)
|
await engine.loadModel(model)
|
||||||
|
|
||||||
|
|||||||
@ -1,17 +1,14 @@
|
|||||||
import { getJanDataFolderPath, joinPath } from '../../core'
|
|
||||||
import { events } from '../../events'
|
import { events } from '../../events'
|
||||||
import { BaseExtension } from '../../extension'
|
import { BaseExtension } from '../../extension'
|
||||||
import { fs } from '../../fs'
|
import { MessageRequest, Model, ModelEvent } from '../../../types'
|
||||||
import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
|
|
||||||
import { EngineManager } from './EngineManager'
|
import { EngineManager } from './EngineManager'
|
||||||
|
import { ModelManager } from '../../models/manager'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base AIEngine
|
* Base AIEngine
|
||||||
* Applicable to all AI Engines
|
* Applicable to all AI Engines
|
||||||
*/
|
*/
|
||||||
export abstract class AIEngine extends BaseExtension {
|
export abstract class AIEngine extends BaseExtension {
|
||||||
private static modelsFolder = 'models'
|
|
||||||
|
|
||||||
// The inference engine
|
// The inference engine
|
||||||
abstract provider: string
|
abstract provider: string
|
||||||
|
|
||||||
@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
|
|||||||
override onLoad() {
|
override onLoad() {
|
||||||
this.registerEngine()
|
this.registerEngine()
|
||||||
|
|
||||||
events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
|
events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
|
||||||
events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
|
events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
|
|||||||
EngineManager.instance().register(this)
|
EngineManager.instance().register(this)
|
||||||
}
|
}
|
||||||
|
|
||||||
async registerModels(models: Model[]): Promise<void> {
|
|
||||||
const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
|
|
||||||
|
|
||||||
let shouldNotifyModelUpdate = false
|
|
||||||
for (const model of models) {
|
|
||||||
const modelPath = await joinPath([modelFolderPath, model.id])
|
|
||||||
const isExist = await fs.existsSync(modelPath)
|
|
||||||
|
|
||||||
if (isExist) {
|
|
||||||
await this.migrateModelIfNeeded(model, modelPath)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
await fs.mkdir(modelPath)
|
|
||||||
await fs.writeFileSync(
|
|
||||||
await joinPath([modelPath, 'model.json']),
|
|
||||||
JSON.stringify(model, null, 2)
|
|
||||||
)
|
|
||||||
shouldNotifyModelUpdate = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if (shouldNotifyModelUpdate) {
|
|
||||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
|
|
||||||
try {
|
|
||||||
const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
|
|
||||||
const currentModel: Model = JSON.parse(modelJson)
|
|
||||||
if (currentModel.version !== model.version) {
|
|
||||||
await fs.writeFileSync(
|
|
||||||
await joinPath([modelPath, 'model.json']),
|
|
||||||
JSON.stringify(model, null, 2)
|
|
||||||
)
|
|
||||||
|
|
||||||
events.emit(ModelEvent.OnModelsUpdate, {})
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.warn('Error while try to migrating model', error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads the model.
|
* Loads the model.
|
||||||
*/
|
*/
|
||||||
async loadModel(model: ModelFile): Promise<any> {
|
async loadModel(model: Model): Promise<any> {
|
||||||
if (model.engine.toString() !== this.provider) return Promise.resolve()
|
if (model.engine.toString() !== this.provider) return Promise.resolve()
|
||||||
events.emit(ModelEvent.OnModelReady, model)
|
events.emit(ModelEvent.OnModelReady, model)
|
||||||
return Promise.resolve()
|
return Promise.resolve()
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
import { InferenceEngine } from '../../../types'
|
||||||
import { AIEngine } from './AIEngine'
|
import { AIEngine } from './AIEngine'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -20,6 +21,22 @@ export class EngineManager {
|
|||||||
* @returns The engine, if found.
|
* @returns The engine, if found.
|
||||||
*/
|
*/
|
||||||
get<T extends AIEngine>(provider: string): T | undefined {
|
get<T extends AIEngine>(provider: string): T | undefined {
|
||||||
|
// Backward compatible provider
|
||||||
|
// nitro is migrated to cortex
|
||||||
|
if (
|
||||||
|
[
|
||||||
|
InferenceEngine.nitro,
|
||||||
|
InferenceEngine.cortex,
|
||||||
|
InferenceEngine.cortex_llamacpp,
|
||||||
|
InferenceEngine.cortex_onnx,
|
||||||
|
InferenceEngine.cortex_tensorrtllm,
|
||||||
|
InferenceEngine.cortex_onnx,
|
||||||
|
]
|
||||||
|
.map((e) => e.toString())
|
||||||
|
.includes(provider)
|
||||||
|
)
|
||||||
|
provider = InferenceEngine.cortex
|
||||||
|
|
||||||
return this.engines.get(provider) as T | undefined
|
return this.engines.get(provider) as T | undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -27,6 +44,6 @@ export class EngineManager {
|
|||||||
* The instance of the engine manager.
|
* The instance of the engine manager.
|
||||||
*/
|
*/
|
||||||
static instance(): EngineManager {
|
static instance(): EngineManager {
|
||||||
return window.core?.engineManager as EngineManager ?? new EngineManager()
|
return (window.core?.engineManager as EngineManager) ?? new EngineManager()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
*/
|
*/
|
||||||
import { LocalOAIEngine } from './LocalOAIEngine'
|
import { LocalOAIEngine } from './LocalOAIEngine'
|
||||||
import { events } from '../../events'
|
import { events } from '../../events'
|
||||||
import { ModelEvent, ModelFile, Model } from '../../../types'
|
import { ModelEvent, Model } from '../../../types'
|
||||||
import { executeOnMain, systemInformation, dirName } from '../../core'
|
import { executeOnMain, systemInformation, dirName } from '../../core'
|
||||||
|
|
||||||
jest.mock('../../core', () => ({
|
jest.mock('../../core', () => ({
|
||||||
@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it('should load model correctly', async () => {
|
it('should load model correctly', async () => {
|
||||||
const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||||
const modelFolder = 'path/to'
|
const modelFolder = 'path/to'
|
||||||
const systemInfo = { os: 'testOS' }
|
const systemInfo = { os: 'testOS' }
|
||||||
const res = { error: null }
|
const res = { error: null }
|
||||||
@ -66,7 +66,7 @@ describe('LocalOAIEngine', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it('should handle load model error', async () => {
|
it('should handle load model error', async () => {
|
||||||
const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
|
||||||
const modelFolder = 'path/to'
|
const modelFolder = 'path/to'
|
||||||
const systemInfo = { os: 'testOS' }
|
const systemInfo = { os: 'testOS' }
|
||||||
const res = { error: 'load error' }
|
const res = { error: 'load error' }
|
||||||
@ -91,9 +91,7 @@ describe('LocalOAIEngine', () => {
|
|||||||
|
|
||||||
it('should not unload model if engine does not match', async () => {
|
it('should not unload model if engine does not match', async () => {
|
||||||
const model: Model = { engine: 'otherProvider' } as any
|
const model: Model = { engine: 'otherProvider' } as any
|
||||||
|
|
||||||
await engine.unloadModel(model)
|
await engine.unloadModel(model)
|
||||||
|
|
||||||
expect(executeOnMain).not.toHaveBeenCalled()
|
expect(executeOnMain).not.toHaveBeenCalled()
|
||||||
expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
|
expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
|
||||||
})
|
})
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
import { executeOnMain, systemInformation, dirName } from '../../core'
|
import { executeOnMain, systemInformation, dirName, joinPath, getJanDataFolderPath } from '../../core'
|
||||||
import { events } from '../../events'
|
import { events } from '../../events'
|
||||||
import { Model, ModelEvent, ModelFile } from '../../../types'
|
import { Model, ModelEvent } from '../../../types'
|
||||||
import { OAIEngine } from './OAIEngine'
|
import { OAIEngine } from './OAIEngine'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -22,16 +22,16 @@ export abstract class LocalOAIEngine extends OAIEngine {
|
|||||||
override onLoad() {
|
override onLoad() {
|
||||||
super.onLoad()
|
super.onLoad()
|
||||||
// These events are applicable to local inference providers
|
// These events are applicable to local inference providers
|
||||||
events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
|
events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
|
||||||
events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
|
events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load the model.
|
* Load the model.
|
||||||
*/
|
*/
|
||||||
override async loadModel(model: ModelFile): Promise<void> {
|
override async loadModel(model: Model & { file_path?: string }): Promise<void> {
|
||||||
if (model.engine.toString() !== this.provider) return
|
if (model.engine.toString() !== this.provider) return
|
||||||
const modelFolder = await dirName(model.file_path)
|
const modelFolder = 'file_path' in model && model.file_path ? await dirName(model.file_path) : await this.getModelFilePath(model.id)
|
||||||
const systemInfo = await systemInformation()
|
const systemInfo = await systemInformation()
|
||||||
const res = await executeOnMain(
|
const res = await executeOnMain(
|
||||||
this.nodeModule,
|
this.nodeModule,
|
||||||
@ -63,4 +63,12 @@ export abstract class LocalOAIEngine extends OAIEngine {
|
|||||||
events.emit(ModelEvent.OnModelStopped, {})
|
events.emit(ModelEvent.OnModelStopped, {})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Legacy
|
||||||
|
private getModelFilePath = async (
|
||||||
|
id: string,
|
||||||
|
): Promise<string> => {
|
||||||
|
return joinPath([await getJanDataFolderPath(), 'models', id])
|
||||||
|
}
|
||||||
|
///
|
||||||
}
|
}
|
||||||
|
|||||||
@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
|
|||||||
* Inference request
|
* Inference request
|
||||||
*/
|
*/
|
||||||
override async inference(data: MessageRequest) {
|
override async inference(data: MessageRequest) {
|
||||||
if (data.model?.engine?.toString() !== this.provider) return
|
if (!data.model?.id) {
|
||||||
|
events.emit(MessageEvent.OnMessageResponse, {
|
||||||
|
status: MessageStatus.Error,
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: ContentType.Text,
|
||||||
|
text: {
|
||||||
|
value: 'No model ID provided',
|
||||||
|
annotations: [],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
const timestamp = Date.now()
|
const timestamp = Date.now()
|
||||||
const message: ThreadMessage = {
|
const message: ThreadMessage = {
|
||||||
@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
|
|||||||
model: model.id,
|
model: model.id,
|
||||||
stream: true,
|
stream: true,
|
||||||
...model.parameters,
|
...model.parameters,
|
||||||
...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
|
|
||||||
}
|
}
|
||||||
if (this.transformPayload) {
|
if (this.transformPayload) {
|
||||||
requestBody = this.transformPayload(requestBody)
|
requestBody = this.transformPayload(requestBody)
|
||||||
|
|||||||
@ -10,7 +10,7 @@ export function requestInference(
|
|||||||
requestBody: any,
|
requestBody: any,
|
||||||
model: {
|
model: {
|
||||||
id: string
|
id: string
|
||||||
parameters: ModelRuntimeParams
|
parameters?: ModelRuntimeParams
|
||||||
},
|
},
|
||||||
controller?: AbortController,
|
controller?: AbortController,
|
||||||
headers?: HeadersInit,
|
headers?: HeadersInit,
|
||||||
@ -22,7 +22,7 @@ export function requestInference(
|
|||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'Access-Control-Allow-Origin': '*',
|
'Access-Control-Allow-Origin': '*',
|
||||||
'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
|
'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
|
||||||
...headers,
|
...headers,
|
||||||
},
|
},
|
||||||
body: JSON.stringify(requestBody),
|
body: JSON.stringify(requestBody),
|
||||||
@ -45,7 +45,7 @@ export function requestInference(
|
|||||||
subscriber.complete()
|
subscriber.complete()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if (model.parameters.stream === false) {
|
if (model.parameters?.stream === false) {
|
||||||
const data = await response.json()
|
const data = await response.json()
|
||||||
if (transformResponse) {
|
if (transformResponse) {
|
||||||
subscriber.next(transformResponse(data))
|
subscriber.next(transformResponse(data))
|
||||||
|
|||||||
@ -1,13 +1,5 @@
|
|||||||
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
import { BaseExtension, ExtensionTypeEnum } from '../extension'
|
||||||
import {
|
import { Model, ModelInterface, OptionType } from '../../types'
|
||||||
GpuSetting,
|
|
||||||
HuggingFaceRepoData,
|
|
||||||
ImportingModel,
|
|
||||||
Model,
|
|
||||||
ModelFile,
|
|
||||||
ModelInterface,
|
|
||||||
OptionType,
|
|
||||||
} from '../../types'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Model extension for managing models.
|
* Model extension for managing models.
|
||||||
@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
|
|||||||
return ExtensionTypeEnum.Model
|
return ExtensionTypeEnum.Model
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract downloadModel(
|
abstract getModels(): Promise<Model[]>
|
||||||
model: Model,
|
abstract pullModel(model: string, id?: string, name?: string): Promise<void>
|
||||||
gpuSettings?: GpuSetting,
|
abstract cancelModelPull(modelId: string): Promise<void>
|
||||||
network?: { proxy: string; ignoreSSL?: boolean }
|
abstract importModel(model: string, modePath: string, name?: string, optionType?: OptionType): Promise<void>
|
||||||
): Promise<void>
|
abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
|
||||||
abstract cancelModelDownload(modelId: string): Promise<void>
|
abstract deleteModel(model: string): Promise<void>
|
||||||
abstract deleteModel(model: ModelFile): Promise<void>
|
|
||||||
abstract getDownloadedModels(): Promise<ModelFile[]>
|
|
||||||
abstract getConfiguredModels(): Promise<ModelFile[]>
|
|
||||||
abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
|
|
||||||
abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
|
|
||||||
abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
|
|
||||||
abstract getDefaultModel(): Promise<Model>
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,32 +1,37 @@
|
|||||||
import * as Core from './core';
|
import * as Core from './core'
|
||||||
import * as Events from './events';
|
import * as Events from './events'
|
||||||
import * as FileSystem from './fs';
|
import * as FileSystem from './fs'
|
||||||
import * as Extension from './extension';
|
import * as Extension from './extension'
|
||||||
import * as Extensions from './extensions';
|
import * as Extensions from './extensions'
|
||||||
import * as Tools from './tools';
|
import * as Tools from './tools'
|
||||||
|
import * as Models from './models'
|
||||||
|
|
||||||
describe('Module Tests', () => {
|
describe('Module Tests', () => {
|
||||||
it('should export Core module', () => {
|
it('should export Core module', () => {
|
||||||
expect(Core).toBeDefined();
|
expect(Core).toBeDefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
it('should export Event module', () => {
|
it('should export Event module', () => {
|
||||||
expect(Events).toBeDefined();
|
expect(Events).toBeDefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
it('should export Filesystem module', () => {
|
it('should export Filesystem module', () => {
|
||||||
expect(FileSystem).toBeDefined();
|
expect(FileSystem).toBeDefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
it('should export Extension module', () => {
|
it('should export Extension module', () => {
|
||||||
expect(Extension).toBeDefined();
|
expect(Extension).toBeDefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
it('should export all base extensions', () => {
|
it('should export all base extensions', () => {
|
||||||
expect(Extensions).toBeDefined();
|
expect(Extensions).toBeDefined()
|
||||||
});
|
})
|
||||||
|
|
||||||
it('should export all base tools', () => {
|
it('should export all base tools', () => {
|
||||||
expect(Tools).toBeDefined();
|
expect(Tools).toBeDefined()
|
||||||
});
|
})
|
||||||
});
|
|
||||||
|
it('should export all base tools', () => {
|
||||||
|
expect(Models).toBeDefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
@ -33,3 +33,9 @@ export * from './extensions'
|
|||||||
* @module
|
* @module
|
||||||
*/
|
*/
|
||||||
export * from './tools'
|
export * from './tools'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Export all base models.
|
||||||
|
* @module
|
||||||
|
*/
|
||||||
|
export * from './models'
|
||||||
|
|||||||
10
core/src/browser/models/index.ts
Normal file
10
core/src/browser/models/index.ts
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* Export ModelManager
|
||||||
|
* @module
|
||||||
|
*/
|
||||||
|
export { ModelManager } from './manager'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Export all utils
|
||||||
|
*/
|
||||||
|
export * from './utils'
|
||||||
47
core/src/browser/models/manager.ts
Normal file
47
core/src/browser/models/manager.ts
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
import { Model, ModelEvent } from '../../types'
|
||||||
|
import { events } from '../events'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Manages the registered models across extensions.
|
||||||
|
*/
|
||||||
|
export class ModelManager {
|
||||||
|
public models = new Map<string, Model>()
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
if (window) {
|
||||||
|
window.core.modelManager = this
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Registers a model.
|
||||||
|
* @param model - The model to register.
|
||||||
|
*/
|
||||||
|
register<T extends Model>(model: T) {
|
||||||
|
if (this.models.has(model.id)) {
|
||||||
|
this.models.set(model.id, {
|
||||||
|
...model,
|
||||||
|
...this.models.get(model.id),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
this.models.set(model.id, model)
|
||||||
|
}
|
||||||
|
events.emit(ModelEvent.OnModelsUpdate, {})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves a model by it's id.
|
||||||
|
* @param id - The id of the model to retrieve.
|
||||||
|
* @returns The model, if found.
|
||||||
|
*/
|
||||||
|
get<T extends Model>(id: string): T | undefined {
|
||||||
|
return this.models.get(id) as T | undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The instance of the tool manager.
|
||||||
|
*/
|
||||||
|
static instance(): ModelManager {
|
||||||
|
return (window.core?.modelManager as ModelManager) ?? new ModelManager()
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,7 +1,10 @@
|
|||||||
// web/utils/modelParam.test.ts
|
// web/utils/modelParam.test.ts
|
||||||
import { normalizeValue, validationRules } from './modelParam'
|
import {
|
||||||
import { extractModelLoadParams } from './modelParam';
|
normalizeValue,
|
||||||
import { extractInferenceParams } from './modelParam';
|
validationRules,
|
||||||
|
extractModelLoadParams,
|
||||||
|
extractInferenceParams,
|
||||||
|
} from './utils'
|
||||||
|
|
||||||
describe('validationRules', () => {
|
describe('validationRules', () => {
|
||||||
it('should validate temperature correctly', () => {
|
it('should validate temperature correctly', () => {
|
||||||
@ -151,13 +154,12 @@ describe('validationRules', () => {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should normalize invalid values for keys not listed in validationRules', () => {
|
||||||
it('should normalize invalid values for keys not listed in validationRules', () => {
|
expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
|
||||||
expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
|
expect(normalizeValue('invalid_key', 123)).toBe(123)
|
||||||
expect(normalizeValue('invalid_key', 123)).toBe(123)
|
expect(normalizeValue('invalid_key', true)).toBe(true)
|
||||||
expect(normalizeValue('invalid_key', true)).toBe(true)
|
expect(normalizeValue('invalid_key', false)).toBe(false)
|
||||||
expect(normalizeValue('invalid_key', false)).toBe(false)
|
})
|
||||||
})
|
|
||||||
|
|
||||||
describe('normalizeValue', () => {
|
describe('normalizeValue', () => {
|
||||||
it('should normalize ctx_len correctly', () => {
|
it('should normalize ctx_len correctly', () => {
|
||||||
@ -192,19 +194,16 @@ describe('normalizeValue', () => {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should handle invalid values correctly by falling back to originParams', () => {
|
||||||
|
const modelParams = { temperature: 'invalid', token_limit: -1 }
|
||||||
|
const originParams = { temperature: 0.5, token_limit: 100 }
|
||||||
|
expect(extractInferenceParams(modelParams as any, originParams)).toEqual(originParams)
|
||||||
|
})
|
||||||
|
|
||||||
it('should handle invalid values correctly by falling back to originParams', () => {
|
it('should return an empty object when no modelParams are provided', () => {
|
||||||
const modelParams = { temperature: 'invalid', token_limit: -1 };
|
expect(extractModelLoadParams()).toEqual({})
|
||||||
const originParams = { temperature: 0.5, token_limit: 100 };
|
})
|
||||||
expect(extractInferenceParams(modelParams, originParams)).toEqual(originParams);
|
|
||||||
});
|
|
||||||
|
|
||||||
|
it('should return an empty object when no modelParams are provided', () => {
|
||||||
it('should return an empty object when no modelParams are provided', () => {
|
expect(extractInferenceParams()).toEqual({})
|
||||||
expect(extractModelLoadParams()).toEqual({});
|
})
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
it('should return an empty object when no modelParams are provided', () => {
|
|
||||||
expect(extractInferenceParams()).toEqual({});
|
|
||||||
});
|
|
||||||
@ -1,26 +1,20 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||||
/* eslint-disable @typescript-eslint/naming-convention */
|
/* eslint-disable @typescript-eslint/naming-convention */
|
||||||
import { ModelRuntimeParams, ModelSettingParams } from '@janhq/core'
|
import { ModelParams, ModelRuntimeParams, ModelSettingParams } from '../../types'
|
||||||
|
|
||||||
import { ModelParams } from '@/types/model'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validation rules for model parameters
|
* Validation rules for model parameters
|
||||||
*/
|
*/
|
||||||
export const validationRules: { [key: string]: (value: any) => boolean } = {
|
export const validationRules: { [key: string]: (value: any) => boolean } = {
|
||||||
temperature: (value: any) =>
|
temperature: (value: any) => typeof value === 'number' && value >= 0 && value <= 2,
|
||||||
typeof value === 'number' && value >= 0 && value <= 2,
|
|
||||||
token_limit: (value: any) => Number.isInteger(value) && value >= 0,
|
token_limit: (value: any) => Number.isInteger(value) && value >= 0,
|
||||||
top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||||
top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||||
stream: (value: any) => typeof value === 'boolean',
|
stream: (value: any) => typeof value === 'boolean',
|
||||||
max_tokens: (value: any) => Number.isInteger(value) && value >= 0,
|
max_tokens: (value: any) => Number.isInteger(value) && value >= 0,
|
||||||
stop: (value: any) =>
|
stop: (value: any) => Array.isArray(value) && value.every((v) => typeof v === 'string'),
|
||||||
Array.isArray(value) && value.every((v) => typeof v === 'string'),
|
frequency_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||||
frequency_penalty: (value: any) =>
|
presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
|
||||||
typeof value === 'number' && value >= 0 && value <= 1,
|
|
||||||
presence_penalty: (value: any) =>
|
|
||||||
typeof value === 'number' && value >= 0 && value <= 1,
|
|
||||||
|
|
||||||
ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
|
ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
|
||||||
ngl: (value: any) => Number.isInteger(value) && value >= 0,
|
ngl: (value: any) => Number.isInteger(value) && value >= 0,
|
||||||
@ -76,6 +70,7 @@ export const extractInferenceParams = (
|
|||||||
stop: undefined,
|
stop: undefined,
|
||||||
frequency_penalty: undefined,
|
frequency_penalty: undefined,
|
||||||
presence_penalty: undefined,
|
presence_penalty: undefined,
|
||||||
|
engine: undefined,
|
||||||
}
|
}
|
||||||
|
|
||||||
const runtimeParams: ModelRuntimeParams = {}
|
const runtimeParams: ModelRuntimeParams = {}
|
||||||
@ -119,11 +114,18 @@ export const extractModelLoadParams = (
|
|||||||
embedding: undefined,
|
embedding: undefined,
|
||||||
n_parallel: undefined,
|
n_parallel: undefined,
|
||||||
cpu_threads: undefined,
|
cpu_threads: undefined,
|
||||||
|
pre_prompt: undefined,
|
||||||
|
system_prompt: undefined,
|
||||||
|
ai_prompt: undefined,
|
||||||
|
user_prompt: undefined,
|
||||||
prompt_template: undefined,
|
prompt_template: undefined,
|
||||||
|
model_path: undefined,
|
||||||
llama_model_path: undefined,
|
llama_model_path: undefined,
|
||||||
mmproj: undefined,
|
mmproj: undefined,
|
||||||
|
cont_batching: undefined,
|
||||||
vision_model: undefined,
|
vision_model: undefined,
|
||||||
text_model: undefined,
|
text_model: undefined,
|
||||||
|
engine: undefined,
|
||||||
}
|
}
|
||||||
const settingParams: ModelSettingParams = {}
|
const settingParams: ModelSettingParams = {}
|
||||||
|
|
||||||
@ -8,7 +8,8 @@ jest.mock('../../helper', () => ({
|
|||||||
|
|
||||||
jest.mock('../../helper/path', () => ({
|
jest.mock('../../helper/path', () => ({
|
||||||
validatePath: jest.fn().mockReturnValue('path/to/folder'),
|
validatePath: jest.fn().mockReturnValue('path/to/folder'),
|
||||||
normalizeFilePath: () => process.platform === 'win32' ? 'C:\\Users\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
|
normalizeFilePath: () =>
|
||||||
|
process.platform === 'win32' ? 'C:\\Users\\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
|
||||||
}))
|
}))
|
||||||
|
|
||||||
jest.mock(
|
jest.mock(
|
||||||
|
|||||||
@ -50,11 +50,6 @@ export class Downloader implements Processor {
|
|||||||
const initialDownloadState: DownloadState = {
|
const initialDownloadState: DownloadState = {
|
||||||
modelId,
|
modelId,
|
||||||
fileName,
|
fileName,
|
||||||
time: {
|
|
||||||
elapsed: 0,
|
|
||||||
remaining: 0,
|
|
||||||
},
|
|
||||||
speed: 0,
|
|
||||||
percent: 0,
|
percent: 0,
|
||||||
size: {
|
size: {
|
||||||
total: 0,
|
total: 0,
|
||||||
|
|||||||
@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {
|
|||||||
|
|
||||||
// add engine for new cortex cpp engine
|
// add engine for new cortex cpp engine
|
||||||
if (requestedModel.engine === 'nitro') {
|
if (requestedModel.engine === 'nitro') {
|
||||||
request.body.engine = 'cortex.llamacpp'
|
request.body.engine = 'llama-cpp'
|
||||||
}
|
}
|
||||||
|
|
||||||
const fetch = require('node-fetch')
|
const fetch = require('node-fetch')
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
|
import { CORTEX_DEFAULT_PORT } from './consts'
|
||||||
|
|
||||||
import { NITRO_DEFAULT_PORT } from './consts';
|
it('should test CORTEX_DEFAULT_PORT', () => {
|
||||||
|
expect(CORTEX_DEFAULT_PORT).toBe(39291)
|
||||||
it('should test NITRO_DEFAULT_PORT', () => {
|
})
|
||||||
expect(NITRO_DEFAULT_PORT).toBe(3928);
|
|
||||||
});
|
|
||||||
|
|||||||
@ -1,19 +1,9 @@
|
|||||||
// The PORT to use for the Nitro subprocess
|
// The PORT to use for the Nitro subprocess
|
||||||
export const NITRO_DEFAULT_PORT = 3928
|
export const CORTEX_DEFAULT_PORT = 39291
|
||||||
|
|
||||||
// The HOST address to use for the Nitro subprocess
|
// The HOST address to use for the Nitro subprocess
|
||||||
export const LOCAL_HOST = '127.0.0.1'
|
export const LOCAL_HOST = '127.0.0.1'
|
||||||
|
|
||||||
export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||||
|
|
||||||
// The URL for the Nitro subprocess
|
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
|
||||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
|
|
||||||
// The URL for the Nitro subprocess to load a model
|
|
||||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
|
||||||
// The URL for the Nitro subprocess to validate a model
|
|
||||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
|
||||||
|
|
||||||
// The URL for the Nitro subprocess to kill itself
|
|
||||||
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
|
||||||
|
|
||||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
|
|
||||||
|
|||||||
@ -1,16 +1,10 @@
|
|||||||
|
import { startModel } from './startStopModel'
|
||||||
|
|
||||||
|
describe('startModel', () => {
|
||||||
|
it('test_startModel_error', async () => {
|
||||||
|
const modelId = 'testModelId'
|
||||||
|
const settingParams = undefined
|
||||||
|
|
||||||
import { startModel } from './startStopModel'
|
expect(startModel(modelId, settingParams)).resolves.toThrow()
|
||||||
|
|
||||||
describe('startModel', () => {
|
|
||||||
it('test_startModel_error', async () => {
|
|
||||||
const modelId = 'testModelId'
|
|
||||||
const settingParams = undefined
|
|
||||||
|
|
||||||
const result = await startModel(modelId, settingParams)
|
|
||||||
|
|
||||||
expect(result).toEqual({
|
|
||||||
error: expect.any(Error),
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
|
})
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
import { join } from 'path'
|
|
||||||
import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
|
|
||||||
import { ModelSettingParams } from '../../../../types'
|
import { ModelSettingParams } from '../../../../types'
|
||||||
|
import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start a model
|
* Start a model
|
||||||
@ -9,70 +8,18 @@ import { ModelSettingParams } from '../../../../types'
|
|||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
||||||
try {
|
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
|
||||||
await runModel(modelId, settingParams)
|
method: 'POST',
|
||||||
|
body: JSON.stringify({ model: modelId, ...settingParams }),
|
||||||
return {
|
})
|
||||||
message: `Model ${modelId} started`,
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
return {
|
|
||||||
error: e,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Run a model using installed cortex extension
|
|
||||||
* @param model
|
|
||||||
* @param settingParams
|
|
||||||
*/
|
|
||||||
const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
|
|
||||||
const janDataFolderPath = getJanDataFolderPath()
|
|
||||||
const modelFolder = join(janDataFolderPath, 'models', model)
|
|
||||||
let module = join(
|
|
||||||
getJanExtensionsPath(),
|
|
||||||
'@janhq',
|
|
||||||
'inference-cortex-extension',
|
|
||||||
'dist',
|
|
||||||
'node',
|
|
||||||
'index.cjs'
|
|
||||||
)
|
|
||||||
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
|
||||||
return import(module).then((extension) =>
|
|
||||||
extension
|
|
||||||
.loadModel(
|
|
||||||
{
|
|
||||||
modelFolder,
|
|
||||||
model,
|
|
||||||
},
|
|
||||||
settingParams
|
|
||||||
)
|
|
||||||
.then(() => log(`[SERVER]::Debug: Model is loaded`))
|
|
||||||
.then({
|
|
||||||
message: 'Model started',
|
|
||||||
})
|
|
||||||
)
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Stop model and kill nitro process.
|
* Stop model.
|
||||||
*/
|
*/
|
||||||
export const stopModel = async (_modelId: string) => {
|
export const stopModel = async (modelId: string) => {
|
||||||
let module = join(
|
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
|
||||||
getJanExtensionsPath(),
|
method: 'POST',
|
||||||
'@janhq',
|
body: JSON.stringify({ model: modelId }),
|
||||||
'inference-cortex-extension',
|
})
|
||||||
'dist',
|
|
||||||
'node',
|
|
||||||
'index.cjs'
|
|
||||||
)
|
|
||||||
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
|
||||||
return import(module).then((extension) =>
|
|
||||||
extension
|
|
||||||
.unloadModel()
|
|
||||||
.then(() => log(`[SERVER]::Debug: Model is unloaded`))
|
|
||||||
.then({
|
|
||||||
message: 'Model stopped',
|
|
||||||
})
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -72,6 +72,8 @@ export enum DownloadEvent {
|
|||||||
onFileDownloadUpdate = 'onFileDownloadUpdate',
|
onFileDownloadUpdate = 'onFileDownloadUpdate',
|
||||||
onFileDownloadError = 'onFileDownloadError',
|
onFileDownloadError = 'onFileDownloadError',
|
||||||
onFileDownloadSuccess = 'onFileDownloadSuccess',
|
onFileDownloadSuccess = 'onFileDownloadSuccess',
|
||||||
|
onFileDownloadStopped = 'onFileDownloadStopped',
|
||||||
|
onFileDownloadStarted = 'onFileDownloadStarted',
|
||||||
onFileUnzipSuccess = 'onFileUnzipSuccess',
|
onFileUnzipSuccess = 'onFileUnzipSuccess',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -6,8 +6,8 @@ export type FileStat = {
|
|||||||
export type DownloadState = {
|
export type DownloadState = {
|
||||||
modelId: string // TODO: change to download id
|
modelId: string // TODO: change to download id
|
||||||
fileName: string
|
fileName: string
|
||||||
time: DownloadTime
|
time?: DownloadTime
|
||||||
speed: number
|
speed?: number
|
||||||
|
|
||||||
percent: number
|
percent: number
|
||||||
size: DownloadSize
|
size: DownloadSize
|
||||||
|
|||||||
@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
|
|||||||
*/
|
*/
|
||||||
export type ModelInfo = {
|
export type ModelInfo = {
|
||||||
id: string
|
id: string
|
||||||
settings: ModelSettingParams
|
settings?: ModelSettingParams
|
||||||
parameters: ModelRuntimeParams
|
parameters?: ModelRuntimeParams
|
||||||
engine?: InferenceEngine
|
engine?: InferenceEngine
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -15,7 +15,6 @@ export type ModelInfo = {
|
|||||||
* Represents the inference engine.
|
* Represents the inference engine.
|
||||||
* @stored
|
* @stored
|
||||||
*/
|
*/
|
||||||
|
|
||||||
export enum InferenceEngine {
|
export enum InferenceEngine {
|
||||||
anthropic = 'anthropic',
|
anthropic = 'anthropic',
|
||||||
mistral = 'mistral',
|
mistral = 'mistral',
|
||||||
@ -28,11 +27,13 @@ export enum InferenceEngine {
|
|||||||
nitro_tensorrt_llm = 'nitro-tensorrt-llm',
|
nitro_tensorrt_llm = 'nitro-tensorrt-llm',
|
||||||
cohere = 'cohere',
|
cohere = 'cohere',
|
||||||
nvidia = 'nvidia',
|
nvidia = 'nvidia',
|
||||||
cortex_llamacpp = 'cortex.llamacpp',
|
cortex = 'cortex',
|
||||||
cortex_onnx = 'cortex.onnx',
|
cortex_llamacpp = 'llama-cpp',
|
||||||
cortex_tensorrtllm = 'cortex.tensorrt-llm',
|
cortex_onnx = 'onnxruntime',
|
||||||
|
cortex_tensorrtllm = 'tensorrt-llm',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Represents an artifact of a model, including its filename and URL
|
||||||
export type ModelArtifact = {
|
export type ModelArtifact = {
|
||||||
filename: string
|
filename: string
|
||||||
url: string
|
url: string
|
||||||
@ -104,6 +105,7 @@ export type Model = {
|
|||||||
engine: InferenceEngine
|
engine: InferenceEngine
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Represents metadata associated with a model
|
||||||
export type ModelMetadata = {
|
export type ModelMetadata = {
|
||||||
author: string
|
author: string
|
||||||
tags: string[]
|
tags: string[]
|
||||||
@ -124,14 +126,20 @@ export type ModelSettingParams = {
|
|||||||
n_parallel?: number
|
n_parallel?: number
|
||||||
cpu_threads?: number
|
cpu_threads?: number
|
||||||
prompt_template?: string
|
prompt_template?: string
|
||||||
|
pre_prompt?: string
|
||||||
system_prompt?: string
|
system_prompt?: string
|
||||||
ai_prompt?: string
|
ai_prompt?: string
|
||||||
user_prompt?: string
|
user_prompt?: string
|
||||||
|
// path param
|
||||||
|
model_path?: string
|
||||||
|
// legacy path param
|
||||||
llama_model_path?: string
|
llama_model_path?: string
|
||||||
|
// clip model path
|
||||||
mmproj?: string
|
mmproj?: string
|
||||||
cont_batching?: boolean
|
cont_batching?: boolean
|
||||||
vision_model?: boolean
|
vision_model?: boolean
|
||||||
text_model?: boolean
|
text_model?: boolean
|
||||||
|
engine?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -150,11 +158,12 @@ export type ModelRuntimeParams = {
|
|||||||
engine?: string
|
engine?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Represents a model that failed to initialize, including the error
|
||||||
export type ModelInitFailed = Model & {
|
export type ModelInitFailed = Model & {
|
||||||
error: Error
|
error: Error
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ModelFile is the model.json entity and it's file metadata
|
* ModelParams types
|
||||||
*/
|
*/
|
||||||
export type ModelFile = Model & FileMetadata
|
export type ModelParams = ModelRuntimeParams | ModelSettingParams
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
export type OptionType = 'SYMLINK' | 'MOVE_BINARY_FILE'
|
export type OptionType = 'symlink' | 'copy'
|
||||||
|
|
||||||
export type ModelImportOption = {
|
export type ModelImportOption = {
|
||||||
type: OptionType
|
type: OptionType
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
import { GpuSetting } from '../miscellaneous'
|
import { Model } from './modelEntity'
|
||||||
import { Model, ModelFile } from './modelEntity'
|
import { OptionType } from './modelImport'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Model extension for managing models.
|
* Model extension for managing models.
|
||||||
@ -8,38 +8,46 @@ export interface ModelInterface {
|
|||||||
/**
|
/**
|
||||||
* Downloads a model.
|
* Downloads a model.
|
||||||
* @param model - The model to download.
|
* @param model - The model to download.
|
||||||
* @param network - Optional object to specify proxy/whether to ignore SSL certificates.
|
|
||||||
* @returns A Promise that resolves when the model has been downloaded.
|
* @returns A Promise that resolves when the model has been downloaded.
|
||||||
*/
|
*/
|
||||||
downloadModel(
|
pullModel(model: string, id?: string, name?: string): Promise<void>
|
||||||
model: ModelFile,
|
|
||||||
gpuSettings?: GpuSetting,
|
|
||||||
network?: { ignoreSSL?: boolean; proxy?: string }
|
|
||||||
): Promise<void>
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cancels the download of a specific model.
|
* Cancels the download of a specific model.
|
||||||
* @param {string} modelId - The ID of the model to cancel the download for.
|
* @param {string} modelId - The ID of the model to cancel the download for.
|
||||||
* @returns {Promise<void>} A promise that resolves when the download has been cancelled.
|
* @returns {Promise<void>} A promise that resolves when the download has been cancelled.
|
||||||
*/
|
*/
|
||||||
cancelModelDownload(modelId: string): Promise<void>
|
cancelModelPull(model: string): Promise<void>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes a model.
|
* Deletes a model.
|
||||||
* @param modelId - The ID of the model to delete.
|
* @param modelId - The ID of the model to delete.
|
||||||
* @returns A Promise that resolves when the model has been deleted.
|
* @returns A Promise that resolves when the model has been deleted.
|
||||||
*/
|
*/
|
||||||
deleteModel(model: ModelFile): Promise<void>
|
deleteModel(model: string): Promise<void>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a list of downloaded models.
|
* Gets downloaded models.
|
||||||
* @returns A Promise that resolves with an array of downloaded models.
|
* @returns A Promise that resolves with an array of downloaded models.
|
||||||
*/
|
*/
|
||||||
getDownloadedModels(): Promise<ModelFile[]>
|
getModels(): Promise<Model[]>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a list of configured models.
|
* Update a pulled model's metadata
|
||||||
* @returns A Promise that resolves with an array of configured models.
|
* @param model - The model to update.
|
||||||
|
* @returns A Promise that resolves when the model has been updated.
|
||||||
*/
|
*/
|
||||||
getConfiguredModels(): Promise<ModelFile[]>
|
updateModel(model: Partial<Model>): Promise<Model>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Import an existing model file.
|
||||||
|
* @param model id of the model to import
|
||||||
|
* @param modelPath - path of the model file
|
||||||
|
*/
|
||||||
|
importModel(
|
||||||
|
model: string,
|
||||||
|
modePath: string,
|
||||||
|
name?: string,
|
||||||
|
optionType?: OptionType
|
||||||
|
): Promise<void>
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,16 +1,13 @@
|
|||||||
|
import * as monitoringInterface from './monitoringInterface'
|
||||||
|
import * as resourceInfo from './resourceInfo'
|
||||||
|
|
||||||
import * as monitoringInterface from './monitoringInterface';
|
import * as index from './index'
|
||||||
import * as resourceInfo from './resourceInfo';
|
|
||||||
|
|
||||||
import * as index from './index';
|
it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
|
||||||
import * as monitoringInterface from './monitoringInterface';
|
for (const key in monitoringInterface) {
|
||||||
import * as resourceInfo from './resourceInfo';
|
expect(index[key]).toBe(monitoringInterface[key])
|
||||||
|
}
|
||||||
it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
|
for (const key in resourceInfo) {
|
||||||
for (const key in monitoringInterface) {
|
expect(index[key]).toBe(resourceInfo[key])
|
||||||
expect(index[key]).toBe(monitoringInterface[key]);
|
}
|
||||||
}
|
})
|
||||||
for (const key in resourceInfo) {
|
|
||||||
expect(index[key]).toBe(resourceInfo[key]);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|||||||
@ -18,7 +18,8 @@
|
|||||||
"docs/**/*",
|
"docs/**/*",
|
||||||
"scripts/**/*",
|
"scripts/**/*",
|
||||||
"icons/**/*",
|
"icons/**/*",
|
||||||
"themes"
|
"themes",
|
||||||
|
"shared"
|
||||||
],
|
],
|
||||||
"asarUnpack": [
|
"asarUnpack": [
|
||||||
"pre-install",
|
"pre-install",
|
||||||
@ -26,7 +27,8 @@
|
|||||||
"docs",
|
"docs",
|
||||||
"scripts",
|
"scripts",
|
||||||
"icons",
|
"icons",
|
||||||
"themes"
|
"themes",
|
||||||
|
"shared"
|
||||||
],
|
],
|
||||||
"publish": [
|
"publish": [
|
||||||
{
|
{
|
||||||
|
|||||||
0
electron/shared/.gitkeep
Normal file
0
electron/shared/.gitkeep
Normal file
@ -15,6 +15,8 @@ import {
|
|||||||
import { Constants } from './constants'
|
import { Constants } from './constants'
|
||||||
import { HubPage } from '../pages/hubPage'
|
import { HubPage } from '../pages/hubPage'
|
||||||
import { CommonActions } from '../pages/commonActions'
|
import { CommonActions } from '../pages/commonActions'
|
||||||
|
import { rmSync } from 'fs'
|
||||||
|
import * as path from 'path'
|
||||||
|
|
||||||
export let electronApp: ElectronApplication
|
export let electronApp: ElectronApplication
|
||||||
export let page: Page
|
export let page: Page
|
||||||
@ -103,10 +105,14 @@ export const test = base.extend<
|
|||||||
},
|
},
|
||||||
{ auto: true },
|
{ auto: true },
|
||||||
],
|
],
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
test.beforeAll(async () => {
|
test.beforeAll(async () => {
|
||||||
|
await rmSync(path.join(__dirname, '../../test-data'), {
|
||||||
|
recursive: true,
|
||||||
|
force: true,
|
||||||
|
})
|
||||||
|
|
||||||
test.setTimeout(TIMEOUT)
|
test.setTimeout(TIMEOUT)
|
||||||
await setupElectron()
|
await setupElectron()
|
||||||
await page.waitForSelector('img[alt="Jan - Logo"]', {
|
await page.waitForSelector('img[alt="Jan - Logo"]', {
|
||||||
|
|||||||
@ -16,7 +16,8 @@ test.beforeAll(async () => {
|
|||||||
test('explores hub', async ({ hubPage }) => {
|
test('explores hub', async ({ hubPage }) => {
|
||||||
await hubPage.navigateByMenu()
|
await hubPage.navigateByMenu()
|
||||||
await hubPage.verifyContainerVisible()
|
await hubPage.verifyContainerVisible()
|
||||||
const useModelBtn= page.getByTestId(/^use-model-btn-.*/).first()
|
await hubPage.scrollToBottom()
|
||||||
|
const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first()
|
||||||
|
|
||||||
await expect(useModelBtn).toBeVisible({
|
await expect(useModelBtn).toBeVisible({
|
||||||
timeout: TIMEOUT,
|
timeout: TIMEOUT,
|
||||||
|
|||||||
@ -8,9 +8,8 @@ export class BasePage {
|
|||||||
constructor(
|
constructor(
|
||||||
protected readonly page: Page,
|
protected readonly page: Page,
|
||||||
readonly action: CommonActions,
|
readonly action: CommonActions,
|
||||||
protected containerId: string,
|
protected containerId: string
|
||||||
) {
|
) {}
|
||||||
}
|
|
||||||
|
|
||||||
public getValue(key: string) {
|
public getValue(key: string) {
|
||||||
return this.action.getValue(key)
|
return this.action.getValue(key)
|
||||||
@ -37,6 +36,12 @@ export class BasePage {
|
|||||||
expect(container.isVisible()).toBeTruthy()
|
expect(container.isVisible()).toBeTruthy()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async scrollToBottom() {
|
||||||
|
await this.page.evaluate(() => {
|
||||||
|
window.scrollTo(0, document.body.scrollHeight)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
async waitUpdateLoader() {
|
async waitUpdateLoader() {
|
||||||
await this.isElementVisible('img[alt="Jan - Logo"]')
|
await this.isElementVisible('img[alt="Jan - Logo"]')
|
||||||
}
|
}
|
||||||
|
|||||||
@ -63,39 +63,46 @@ export default class JanAssistantExtension extends AssistantExtension {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async getAssistants(): Promise<Assistant[]> {
|
async getAssistants(): Promise<Assistant[]> {
|
||||||
// get all the assistant directories
|
try {
|
||||||
// get all the assistant metadata json
|
// get all the assistant directories
|
||||||
const results: Assistant[] = []
|
// get all the assistant metadata json
|
||||||
const allFileName: string[] = await fs.readdirSync(
|
const results: Assistant[] = []
|
||||||
JanAssistantExtension._homeDir
|
|
||||||
)
|
|
||||||
for (const fileName of allFileName) {
|
|
||||||
const filePath = await joinPath([
|
|
||||||
JanAssistantExtension._homeDir,
|
|
||||||
fileName,
|
|
||||||
])
|
|
||||||
|
|
||||||
if (!(await fs.fileStat(filePath))?.isDirectory) continue
|
const allFileName: string[] = await fs.readdirSync(
|
||||||
const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
|
JanAssistantExtension._homeDir
|
||||||
(file: string) => file === 'assistant.json'
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if (jsonFiles.length !== 1) {
|
for (const fileName of allFileName) {
|
||||||
// has more than one assistant file -> ignore
|
const filePath = await joinPath([
|
||||||
continue
|
JanAssistantExtension._homeDir,
|
||||||
|
fileName,
|
||||||
|
])
|
||||||
|
|
||||||
|
if (!(await fs.fileStat(filePath))?.isDirectory) continue
|
||||||
|
const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
|
||||||
|
(file: string) => file === 'assistant.json'
|
||||||
|
)
|
||||||
|
|
||||||
|
if (jsonFiles.length !== 1) {
|
||||||
|
// has more than one assistant file -> ignore
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = await fs.readFileSync(
|
||||||
|
await joinPath([filePath, jsonFiles[0]]),
|
||||||
|
'utf-8'
|
||||||
|
)
|
||||||
|
const assistant: Assistant =
|
||||||
|
typeof content === 'object' ? content : JSON.parse(content)
|
||||||
|
|
||||||
|
results.push(assistant)
|
||||||
}
|
}
|
||||||
|
|
||||||
const content = await fs.readFileSync(
|
return results
|
||||||
await joinPath([filePath, jsonFiles[0]]),
|
} catch (err) {
|
||||||
'utf-8'
|
console.debug(err)
|
||||||
)
|
return [this.defaultAssistant]
|
||||||
const assistant: Assistant =
|
|
||||||
typeof content === 'object' ? content : JSON.parse(content)
|
|
||||||
|
|
||||||
results.push(assistant)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return results
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async deleteAssistant(assistant: Assistant): Promise<void> {
|
async deleteAssistant(assistant: Assistant): Promise<void> {
|
||||||
@ -112,39 +119,39 @@ export default class JanAssistantExtension extends AssistantExtension {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async createJanAssistant(): Promise<void> {
|
private async createJanAssistant(): Promise<void> {
|
||||||
const janAssistant: Assistant = {
|
await this.createAssistant(this.defaultAssistant)
|
||||||
avatar: '',
|
}
|
||||||
thread_location: undefined,
|
|
||||||
id: 'jan',
|
private defaultAssistant: Assistant = {
|
||||||
object: 'assistant',
|
avatar: '',
|
||||||
created_at: Date.now(),
|
thread_location: undefined,
|
||||||
name: 'Jan',
|
id: 'jan',
|
||||||
description: 'A default assistant that can use all downloaded models',
|
object: 'assistant',
|
||||||
model: '*',
|
created_at: Date.now(),
|
||||||
instructions: '',
|
name: 'Jan',
|
||||||
tools: [
|
description: 'A default assistant that can use all downloaded models',
|
||||||
{
|
model: '*',
|
||||||
type: 'retrieval',
|
instructions: '',
|
||||||
enabled: false,
|
tools: [
|
||||||
useTimeWeightedRetriever: false,
|
{
|
||||||
settings: {
|
type: 'retrieval',
|
||||||
top_k: 2,
|
enabled: false,
|
||||||
chunk_size: 1024,
|
useTimeWeightedRetriever: false,
|
||||||
chunk_overlap: 64,
|
settings: {
|
||||||
retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
top_k: 2,
|
||||||
|
chunk_size: 1024,
|
||||||
|
chunk_overlap: 64,
|
||||||
|
retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
||||||
----------------
|
----------------
|
||||||
CONTEXT: {CONTEXT}
|
CONTEXT: {CONTEXT}
|
||||||
----------------
|
----------------
|
||||||
QUESTION: {QUESTION}
|
QUESTION: {QUESTION}
|
||||||
----------------
|
----------------
|
||||||
Helpful Answer:`,
|
Helpful Answer:`,
|
||||||
},
|
|
||||||
},
|
},
|
||||||
],
|
},
|
||||||
file_ids: [],
|
],
|
||||||
metadata: undefined,
|
file_ids: [],
|
||||||
}
|
metadata: undefined,
|
||||||
|
|
||||||
await this.createAssistant(janAssistant)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
|
|||||||
import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
|
import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
|
||||||
import { readEmbeddingEngine } from './engine'
|
import { readEmbeddingEngine } from './engine'
|
||||||
|
|
||||||
import path from 'path'
|
|
||||||
|
|
||||||
export class Retrieval {
|
export class Retrieval {
|
||||||
public chunkSize: number = 100
|
public chunkSize: number = 100
|
||||||
public chunkOverlap?: number = 0
|
public chunkOverlap?: number = 0
|
||||||
|
|||||||
1
extensions/inference-cortex-extension/bin/version.txt
Normal file
1
extensions/inference-cortex-extension/bin/version.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
1.0.2-rc4
|
||||||
41
extensions/inference-cortex-extension/download.bat
Normal file
41
extensions/inference-cortex-extension/download.bat
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
@echo off
|
||||||
|
set BIN_PATH=./bin
|
||||||
|
set SHARED_PATH=./../../electron/shared
|
||||||
|
set /p CORTEX_VERSION=<./bin/version.txt
|
||||||
|
|
||||||
|
@REM Download cortex.llamacpp binaries
|
||||||
|
set VERSION=v0.1.35
|
||||||
|
set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
|
||||||
|
set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
|
||||||
|
set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
|
||||||
|
|
||||||
|
call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
|
||||||
|
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
|
||||||
|
call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
|
||||||
|
|
||||||
|
move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
|
||||||
|
del %BIN_PATH%\cortex-beta.exe
|
||||||
|
del %BIN_PATH%\cortex.exe
|
||||||
|
|
||||||
|
@REM Loop through each folder and move DLLs (excluding engine.dll)
|
||||||
|
for %%F in (%SUBFOLDERS%) do (
|
||||||
|
echo Processing folder: %BIN_PATH%\%%F
|
||||||
|
|
||||||
|
@REM Move all .dll files except engine.dll
|
||||||
|
for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
|
||||||
|
if /I not "%%~nxD"=="engine.dll" (
|
||||||
|
move "%%D" "%BIN_PATH%"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
echo DLL files moved successfully.
|
||||||
47
extensions/inference-cortex-extension/download.sh
Executable file
47
extensions/inference-cortex-extension/download.sh
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Read CORTEX_VERSION
|
||||||
|
CORTEX_VERSION=$(cat ./bin/version.txt)
|
||||||
|
CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
|
||||||
|
ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
|
||||||
|
CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
|
||||||
|
# Detect platform
|
||||||
|
OS_TYPE=$(uname)
|
||||||
|
|
||||||
|
if [ "$OS_TYPE" == "Linux" ]; then
|
||||||
|
# Linux downloads
|
||||||
|
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
|
||||||
|
mv ./bin/cortex-server-beta ./bin/cortex-server
|
||||||
|
rm -rf ./bin/cortex
|
||||||
|
rm -rf ./bin/cortex-beta
|
||||||
|
chmod +x "./bin/cortex-server"
|
||||||
|
|
||||||
|
# Download engines for Linux
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
|
||||||
|
download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
|
||||||
|
download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
|
||||||
|
|
||||||
|
elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||||
|
# macOS downloads
|
||||||
|
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
|
||||||
|
mv ./bin/cortex-server-beta ./bin/cortex-server
|
||||||
|
rm -rf ./bin/cortex
|
||||||
|
rm -rf ./bin/cortex-beta
|
||||||
|
chmod +x "./bin/cortex-server"
|
||||||
|
|
||||||
|
# Download engines for macOS
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
|
||||||
|
download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "Unsupported operating system: $OS_TYPE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
@ -10,12 +10,12 @@
|
|||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "jest",
|
"test": "jest",
|
||||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||||
"downloadnitro:linux:darwin": "./download.sh",
|
"downloadcortex:linux:darwin": "./download.sh",
|
||||||
"downloadnitro:win32": "download.bat",
|
"downloadcortex:win32": "download.bat",
|
||||||
"downloadnitro": "run-script-os",
|
"downloadcortex": "run-script-os",
|
||||||
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
"build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||||
"build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
"build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
|
||||||
"build:publish": "yarn test && run-script-os"
|
"build:publish": "run-script-os"
|
||||||
},
|
},
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
@ -50,6 +50,8 @@
|
|||||||
"cpu-instructions": "^0.0.13",
|
"cpu-instructions": "^0.0.13",
|
||||||
"decompress": "^4.2.1",
|
"decompress": "^4.2.1",
|
||||||
"fetch-retry": "^5.0.6",
|
"fetch-retry": "^5.0.6",
|
||||||
|
"ky": "^1.7.2",
|
||||||
|
"p-queue": "^8.0.1",
|
||||||
"rxjs": "^7.8.1",
|
"rxjs": "^7.8.1",
|
||||||
"tcp-port-used": "^1.0.2",
|
"tcp-port-used": "^1.0.2",
|
||||||
"terminate": "2.6.1",
|
"terminate": "2.6.1",
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["34B", "Finetuned"],
|
"tags": ["34B", "Finetuned"],
|
||||||
"size": 21556982144
|
"size": 21556982144
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["7B", "Finetuned"],
|
"tags": ["7B", "Finetuned"],
|
||||||
"size": 5056982144
|
"size": 5056982144
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["Vision"],
|
"tags": ["Vision"],
|
||||||
"size": 5750000000
|
"size": 5750000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -30,5 +30,5 @@
|
|||||||
"tags": ["7B", "Finetuned"],
|
"tags": ["7B", "Finetuned"],
|
||||||
"size": 4370000000
|
"size": 4370000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["22B", "Finetuned", "Featured"],
|
"tags": ["22B", "Finetuned", "Featured"],
|
||||||
"size": 13341237440
|
"size": 13341237440
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["34B", "Finetuned"],
|
"tags": ["34B", "Finetuned"],
|
||||||
"size": 21500000000
|
"size": 21500000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["Tiny"],
|
"tags": ["Tiny"],
|
||||||
"size": 1430000000
|
"size": 1430000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["33B"],
|
"tags": ["33B"],
|
||||||
"size": 19940000000
|
"size": 19940000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["2B", "Finetuned", "Tiny"],
|
"tags": ["2B", "Finetuned", "Tiny"],
|
||||||
"size": 1630000000
|
"size": 1630000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["7B", "Finetuned"],
|
"tags": ["7B", "Finetuned"],
|
||||||
"size": 5330000000
|
"size": 5330000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -37,5 +37,5 @@
|
|||||||
],
|
],
|
||||||
"size": 16600000000
|
"size": 16600000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -38,5 +38,5 @@
|
|||||||
],
|
],
|
||||||
"size": 1710000000
|
"size": 1710000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -37,5 +37,5 @@
|
|||||||
],
|
],
|
||||||
"size": 5760000000
|
"size": 5760000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["70B", "Foundational Model"],
|
"tags": ["70B", "Foundational Model"],
|
||||||
"size": 43920000000
|
"size": 43920000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["7B", "Foundational Model"],
|
"tags": ["7B", "Foundational Model"],
|
||||||
"size": 4080000000
|
"size": 4080000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["8B"],
|
"tags": ["8B"],
|
||||||
"size": 4920000000
|
"size": 4920000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -34,5 +34,5 @@
|
|||||||
],
|
],
|
||||||
"size": 4920000000
|
"size": 4920000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -37,5 +37,5 @@
|
|||||||
],
|
],
|
||||||
"size": 42500000000
|
"size": 42500000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -37,5 +37,5 @@
|
|||||||
],
|
],
|
||||||
"size": 4920000000
|
"size": 4920000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["1B", "Featured"],
|
"tags": ["1B", "Featured"],
|
||||||
"size": 1320000000
|
"size": 1320000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["3B", "Featured"],
|
"tags": ["3B", "Featured"],
|
||||||
"size": 3420000000
|
"size": 3420000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -34,5 +34,5 @@
|
|||||||
],
|
],
|
||||||
"size": 1170000000
|
"size": 1170000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -32,5 +32,5 @@
|
|||||||
"tags": ["Vision"],
|
"tags": ["Vision"],
|
||||||
"size": 7870000000
|
"size": 7870000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -32,5 +32,5 @@
|
|||||||
"tags": ["Vision"],
|
"tags": ["Vision"],
|
||||||
"size": 4370000000
|
"size": 4370000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -32,5 +32,5 @@
|
|||||||
"size": 4370000000,
|
"size": 4370000000,
|
||||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
|
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -30,5 +30,5 @@
|
|||||||
"tags": ["70B", "Foundational Model"],
|
"tags": ["70B", "Foundational Model"],
|
||||||
"size": 26440000000
|
"size": 26440000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["7B", "Finetuned"],
|
"tags": ["7B", "Finetuned"],
|
||||||
"size": 4370000000
|
"size": 4370000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["Recommended", "7B", "Finetuned"],
|
"tags": ["Recommended", "7B", "Finetuned"],
|
||||||
"size": 4370000000
|
"size": 4370000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -34,5 +34,5 @@
|
|||||||
],
|
],
|
||||||
"size": 2320000000
|
"size": 2320000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -34,5 +34,5 @@
|
|||||||
],
|
],
|
||||||
"size": 8366000000
|
"size": 8366000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["34B", "Finetuned"],
|
"tags": ["34B", "Finetuned"],
|
||||||
"size": 20220000000
|
"size": 20220000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["7B", "Finetuned"],
|
"tags": ["7B", "Finetuned"],
|
||||||
"size": 4770000000
|
"size": 4770000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["7B", "Finetuned"],
|
"tags": ["7B", "Finetuned"],
|
||||||
"size": 4680000000
|
"size": 4680000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["14B", "Featured"],
|
"tags": ["14B", "Featured"],
|
||||||
"size": 8990000000
|
"size": 8990000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["32B"],
|
"tags": ["32B"],
|
||||||
"size": 19900000000
|
"size": 19900000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["72B"],
|
"tags": ["72B"],
|
||||||
"size": 47400000000
|
"size": 47400000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["7B", "Featured"],
|
"tags": ["7B", "Featured"],
|
||||||
"size": 4680000000
|
"size": 4680000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["7B", "Featured"],
|
"tags": ["7B", "Featured"],
|
||||||
"size": 4680000000
|
"size": 4680000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["3B", "Finetuned", "Tiny"],
|
"tags": ["3B", "Finetuned", "Tiny"],
|
||||||
"size": 2970000000
|
"size": 2970000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -30,5 +30,5 @@
|
|||||||
"tags": ["7B", "Finetuned"],
|
"tags": ["7B", "Finetuned"],
|
||||||
"size": 4370000000
|
"size": 4370000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["Tiny", "Foundation Model"],
|
"tags": ["Tiny", "Foundation Model"],
|
||||||
"size": 669000000
|
"size": 669000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"size": 4370000000,
|
"size": 4370000000,
|
||||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
|
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,6 +31,6 @@
|
|||||||
"tags": ["7B", "Finetuned"],
|
"tags": ["7B", "Finetuned"],
|
||||||
"size": 4410000000
|
"size": 4410000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["Recommended", "13B", "Finetuned"],
|
"tags": ["Recommended", "13B", "Finetuned"],
|
||||||
"size": 7870000000
|
"size": 7870000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -31,5 +31,5 @@
|
|||||||
"tags": ["34B", "Foundational Model"],
|
"tags": ["34B", "Foundational Model"],
|
||||||
"size": 20660000000
|
"size": 20660000000
|
||||||
},
|
},
|
||||||
"engine": "nitro"
|
"engine": "llama-cpp"
|
||||||
}
|
}
|
||||||
@ -114,19 +114,7 @@ export default [
|
|||||||
]),
|
]),
|
||||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||||
INFERENCE_URL: JSON.stringify(
|
CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
|
||||||
process.env.INFERENCE_URL ||
|
|
||||||
'http://127.0.0.1:3928/inferences/server/chat_completion'
|
|
||||||
),
|
|
||||||
TROUBLESHOOTING_URL: JSON.stringify(
|
|
||||||
'https://jan.ai/guides/troubleshooting'
|
|
||||||
),
|
|
||||||
JAN_SERVER_INFERENCE_URL: JSON.stringify(
|
|
||||||
'http://localhost:1337/v1/chat/completions'
|
|
||||||
),
|
|
||||||
CUDA_DOWNLOAD_URL: JSON.stringify(
|
|
||||||
'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
|
|
||||||
),
|
|
||||||
}),
|
}),
|
||||||
// Allow json resolution
|
// Allow json resolution
|
||||||
json(),
|
json(),
|
||||||
@ -1,7 +1,5 @@
|
|||||||
declare const NODE: string
|
declare const NODE: string
|
||||||
declare const INFERENCE_URL: string
|
declare const CORTEX_API_URL: string
|
||||||
declare const TROUBLESHOOTING_URL: string
|
|
||||||
declare const JAN_SERVER_INFERENCE_URL: string
|
|
||||||
declare const DEFAULT_SETTINGS: Array<any>
|
declare const DEFAULT_SETTINGS: Array<any>
|
||||||
declare const MODELS: Array<any>
|
declare const MODELS: Array<any>
|
||||||
|
|
||||||
168
extensions/inference-cortex-extension/src/index.ts
Normal file
168
extensions/inference-cortex-extension/src/index.ts
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
/**
|
||||||
|
* @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
|
||||||
|
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||||
|
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||||
|
* @version 1.0.0
|
||||||
|
* @module inference-extension/src/index
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
Model,
|
||||||
|
executeOnMain,
|
||||||
|
systemInformation,
|
||||||
|
joinPath,
|
||||||
|
dirName,
|
||||||
|
LocalOAIEngine,
|
||||||
|
InferenceEngine,
|
||||||
|
getJanDataFolderPath,
|
||||||
|
extractModelLoadParams,
|
||||||
|
} from '@janhq/core'
|
||||||
|
import PQueue from 'p-queue'
|
||||||
|
import ky from 'ky'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||||
|
* The class provides methods for initializing and stopping a model, and for making inference requests.
|
||||||
|
* It also subscribes to events emitted by the @janhq/core package and handles new message requests.
|
||||||
|
*/
|
||||||
|
export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||||
|
// DEPRECATED
|
||||||
|
nodeModule: string = 'node'
|
||||||
|
|
||||||
|
queue = new PQueue({ concurrency: 1 })
|
||||||
|
|
||||||
|
provider: string = InferenceEngine.cortex
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The URL for making inference requests.
|
||||||
|
*/
|
||||||
|
inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subscribes to events emitted by the @janhq/core package.
|
||||||
|
*/
|
||||||
|
async onLoad() {
|
||||||
|
const models = MODELS as Model[]
|
||||||
|
|
||||||
|
this.registerModels(models)
|
||||||
|
|
||||||
|
super.onLoad()
|
||||||
|
|
||||||
|
// Run the process watchdog
|
||||||
|
const systemInfo = await systemInformation()
|
||||||
|
await this.clean()
|
||||||
|
await executeOnMain(NODE, 'run', systemInfo)
|
||||||
|
|
||||||
|
this.queue.add(() => this.healthz())
|
||||||
|
|
||||||
|
window.addEventListener('beforeunload', () => {
|
||||||
|
this.clean()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
onUnload(): void {
|
||||||
|
this.clean()
|
||||||
|
executeOnMain(NODE, 'dispose')
|
||||||
|
super.onUnload()
|
||||||
|
}
|
||||||
|
|
||||||
|
override async loadModel(
|
||||||
|
model: Model & { file_path?: string }
|
||||||
|
): Promise<void> {
|
||||||
|
if (
|
||||||
|
model.engine === InferenceEngine.nitro &&
|
||||||
|
model.settings.llama_model_path
|
||||||
|
) {
|
||||||
|
// Legacy chat model support
|
||||||
|
model.settings = {
|
||||||
|
...model.settings,
|
||||||
|
llama_model_path: await getModelFilePath(
|
||||||
|
model,
|
||||||
|
model.settings.llama_model_path
|
||||||
|
),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const { llama_model_path, ...settings } = model.settings
|
||||||
|
model.settings = settings
|
||||||
|
}
|
||||||
|
|
||||||
|
if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
|
||||||
|
// Legacy clip vision model support
|
||||||
|
model.settings = {
|
||||||
|
...model.settings,
|
||||||
|
mmproj: await getModelFilePath(model, model.settings.mmproj),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const { mmproj, ...settings } = model.settings
|
||||||
|
model.settings = settings
|
||||||
|
}
|
||||||
|
|
||||||
|
return await ky
|
||||||
|
.post(`${CORTEX_API_URL}/v1/models/start`, {
|
||||||
|
json: {
|
||||||
|
...extractModelLoadParams(model.settings),
|
||||||
|
model: model.id,
|
||||||
|
engine:
|
||||||
|
model.engine === InferenceEngine.nitro // Legacy model cache
|
||||||
|
? InferenceEngine.cortex_llamacpp
|
||||||
|
: model.engine,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.json()
|
||||||
|
.catch(async (e) => {
|
||||||
|
throw (await e.response?.json()) ?? e
|
||||||
|
})
|
||||||
|
.then()
|
||||||
|
}
|
||||||
|
|
||||||
|
override async unloadModel(model: Model): Promise<void> {
|
||||||
|
return ky
|
||||||
|
.post(`${CORTEX_API_URL}/v1/models/stop`, {
|
||||||
|
json: { model: model.id },
|
||||||
|
})
|
||||||
|
.json()
|
||||||
|
.then()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do health check on cortex.cpp
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
healthz(): Promise<void> {
|
||||||
|
return ky
|
||||||
|
.get(`${CORTEX_API_URL}/healthz`, {
|
||||||
|
retry: {
|
||||||
|
limit: 10,
|
||||||
|
methods: ['get'],
|
||||||
|
},
|
||||||
|
})
|
||||||
|
.then(() => {})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clean cortex processes
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
clean(): Promise<any> {
|
||||||
|
return ky
|
||||||
|
.delete(`${CORTEX_API_URL}/processmanager/destroy`, {
|
||||||
|
timeout: 2000, // maximum 2 seconds
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
// Do nothing
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Legacy
|
||||||
|
export const getModelFilePath = async (
|
||||||
|
model: Model,
|
||||||
|
file: string
|
||||||
|
): Promise<string> => {
|
||||||
|
// Symlink to the model file
|
||||||
|
if (!model.sources[0]?.url.startsWith('http')) {
|
||||||
|
return model.sources[0]?.url
|
||||||
|
}
|
||||||
|
return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
|
||||||
|
}
|
||||||
|
///
|
||||||
@ -1,5 +1,5 @@
|
|||||||
import { describe, expect, it } from '@jest/globals'
|
import { describe, expect, it } from '@jest/globals'
|
||||||
import { executableNitroFile } from './execute'
|
import { executableCortexFile } from './execute'
|
||||||
import { GpuSetting } from '@janhq/core'
|
import { GpuSetting } from '@janhq/core'
|
||||||
import { cpuInfo } from 'cpu-instructions'
|
import { cpuInfo } from 'cpu-instructions'
|
||||||
|
|
||||||
@ -27,10 +27,10 @@ jest.mock('cpu-instructions', () => ({
|
|||||||
cpuInfo: jest.fn(),
|
cpuInfo: jest.fn(),
|
||||||
},
|
},
|
||||||
}))
|
}))
|
||||||
let mock = cpuInfo.cpuInfo as jest.Mock
|
let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
|
||||||
mock.mockReturnValue([])
|
mockCpuInfo.mockReturnValue([])
|
||||||
|
|
||||||
describe('test executable nitro file', () => {
|
describe('test executable cortex file', () => {
|
||||||
afterAll(function () {
|
afterAll(function () {
|
||||||
Object.defineProperty(process, 'platform', {
|
Object.defineProperty(process, 'platform', {
|
||||||
value: originalPlatform,
|
value: originalPlatform,
|
||||||
@ -44,10 +44,14 @@ describe('test executable nitro file', () => {
|
|||||||
Object.defineProperty(process, 'arch', {
|
Object.defineProperty(process, 'arch', {
|
||||||
value: 'arm64',
|
value: 'arm64',
|
||||||
})
|
})
|
||||||
expect(executableNitroFile(testSettings)).toEqual(
|
expect(executableCortexFile(testSettings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`mac-arm64`),
|
enginePath: expect.stringContaining(`arm64`),
|
||||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath:
|
||||||
|
originalPlatform === 'darwin'
|
||||||
|
? expect.stringContaining(`cortex-server`)
|
||||||
|
: expect.anything(),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -55,10 +59,14 @@ describe('test executable nitro file', () => {
|
|||||||
Object.defineProperty(process, 'arch', {
|
Object.defineProperty(process, 'arch', {
|
||||||
value: 'x64',
|
value: 'x64',
|
||||||
})
|
})
|
||||||
expect(executableNitroFile(testSettings)).toEqual(
|
expect(executableCortexFile(testSettings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`mac-x64`),
|
enginePath: expect.stringContaining(`x64`),
|
||||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath:
|
||||||
|
originalPlatform === 'darwin'
|
||||||
|
? expect.stringContaining(`cortex-server`)
|
||||||
|
: expect.anything(),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -73,10 +81,12 @@ describe('test executable nitro file', () => {
|
|||||||
...testSettings,
|
...testSettings,
|
||||||
run_mode: 'cpu',
|
run_mode: 'cpu',
|
||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
mockCpuInfo.mockReturnValue(['avx'])
|
||||||
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`win`),
|
enginePath: expect.stringContaining(`avx`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -107,10 +117,12 @@ describe('test executable nitro file', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
mockCpuInfo.mockReturnValue(['avx2'])
|
||||||
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`win-cuda-11-7`),
|
enginePath: expect.stringContaining(`avx2-cuda-11-7`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -141,10 +153,12 @@ describe('test executable nitro file', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
mockCpuInfo.mockReturnValue(['noavx'])
|
||||||
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`win-cuda-12-0`),
|
enginePath: expect.stringContaining(`noavx-cuda-12-0`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -159,10 +173,11 @@ describe('test executable nitro file', () => {
|
|||||||
...testSettings,
|
...testSettings,
|
||||||
run_mode: 'cpu',
|
run_mode: 'cpu',
|
||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
mockCpuInfo.mockReturnValue(['noavx'])
|
||||||
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`linux`),
|
enginePath: expect.stringContaining(`noavx`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
executablePath: expect.stringContaining(`cortex-server`),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -193,10 +208,11 @@ describe('test executable nitro file', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`linux-cuda-11-7`),
|
enginePath: expect.stringContaining(`cuda-11-7`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -227,10 +243,11 @@ describe('test executable nitro file', () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`linux-cuda-12-0`),
|
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -249,12 +266,13 @@ describe('test executable nitro file', () => {
|
|||||||
|
|
||||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mock.mockReturnValue([instruction])
|
mockCpuInfo.mockReturnValue([instruction])
|
||||||
|
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`linux-${instruction}`),
|
enginePath: expect.stringContaining(instruction),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server`),
|
||||||
|
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
@ -273,11 +291,12 @@ describe('test executable nitro file', () => {
|
|||||||
}
|
}
|
||||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mock.mockReturnValue([instruction])
|
mockCpuInfo.mockReturnValue([instruction])
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`win-${instruction}`),
|
enginePath: expect.stringContaining(instruction),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -312,11 +331,12 @@ describe('test executable nitro file', () => {
|
|||||||
}
|
}
|
||||||
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mock.mockReturnValue([instruction])
|
mockCpuInfo.mockReturnValue([instruction])
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`win-cuda-12-0`),
|
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp.exe`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server.exe`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -351,11 +371,12 @@ describe('test executable nitro file', () => {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mock.mockReturnValue([instruction])
|
mockCpuInfo.mockReturnValue([instruction])
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`linux-cuda-12-0`),
|
enginePath: expect.stringContaining(`cuda-12-0`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -391,11 +412,12 @@ describe('test executable nitro file', () => {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
cpuInstructions.forEach((instruction) => {
|
cpuInstructions.forEach((instruction) => {
|
||||||
mock.mockReturnValue([instruction])
|
mockCpuInfo.mockReturnValue([instruction])
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`linux-vulkan`),
|
enginePath: expect.stringContaining(`vulkan`),
|
||||||
executablePath: expect.stringContaining(`cortex-cpp`),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath: expect.stringContaining(`cortex-server`),
|
||||||
cudaVisibleDevices: '0',
|
cudaVisibleDevices: '0',
|
||||||
vkVisibleDevices: '0',
|
vkVisibleDevices: '0',
|
||||||
})
|
})
|
||||||
@ -417,11 +439,15 @@ describe('test executable nitro file', () => {
|
|||||||
...testSettings,
|
...testSettings,
|
||||||
run_mode: 'cpu',
|
run_mode: 'cpu',
|
||||||
}
|
}
|
||||||
mock.mockReturnValue([])
|
mockCpuInfo.mockReturnValue([])
|
||||||
expect(executableNitroFile(settings)).toEqual(
|
expect(executableCortexFile(settings)).toEqual(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
enginePath: expect.stringContaining(`mac-x64`),
|
enginePath: expect.stringContaining(`x64`),
|
||||||
executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
|
binPath: expect.stringContaining(`bin`),
|
||||||
|
executablePath:
|
||||||
|
originalPlatform === 'darwin'
|
||||||
|
? expect.stringContaining(`cortex-server`)
|
||||||
|
: expect.anything(),
|
||||||
cudaVisibleDevices: '',
|
cudaVisibleDevices: '',
|
||||||
vkVisibleDevices: '',
|
vkVisibleDevices: '',
|
||||||
})
|
})
|
||||||
@ -2,8 +2,9 @@ import { GpuSetting } from '@janhq/core'
|
|||||||
import * as path from 'path'
|
import * as path from 'path'
|
||||||
import { cpuInfo } from 'cpu-instructions'
|
import { cpuInfo } from 'cpu-instructions'
|
||||||
|
|
||||||
export interface NitroExecutableOptions {
|
export interface CortexExecutableOptions {
|
||||||
enginePath: string
|
enginePath: string
|
||||||
|
binPath: string
|
||||||
executablePath: string
|
executablePath: string
|
||||||
cudaVisibleDevices: string
|
cudaVisibleDevices: string
|
||||||
vkVisibleDevices: string
|
vkVisibleDevices: string
|
||||||
@ -36,8 +37,8 @@ const os = (): string => {
|
|||||||
? 'win'
|
? 'win'
|
||||||
: process.platform === 'darwin'
|
: process.platform === 'darwin'
|
||||||
? process.arch === 'arm64'
|
? process.arch === 'arm64'
|
||||||
? 'mac-arm64'
|
? 'arm64'
|
||||||
: 'mac-x64'
|
: 'x64'
|
||||||
: 'linux'
|
: 'linux'
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,7 +67,7 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
|||||||
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
const cpuInstructions = () => {
|
const cpuInstructions = (): string => {
|
||||||
if (process.platform === 'darwin') return ''
|
if (process.platform === 'darwin') return ''
|
||||||
return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
|
return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
|
||||||
? 'avx512'
|
? 'avx512'
|
||||||
@ -81,29 +82,32 @@ const cpuInstructions = () => {
|
|||||||
* Find which executable file to run based on the current platform.
|
* Find which executable file to run based on the current platform.
|
||||||
* @returns The name of the executable file to run.
|
* @returns The name of the executable file to run.
|
||||||
*/
|
*/
|
||||||
export const executableNitroFile = (
|
export const executableCortexFile = (
|
||||||
gpuSetting?: GpuSetting
|
gpuSetting?: GpuSetting
|
||||||
): NitroExecutableOptions => {
|
): CortexExecutableOptions => {
|
||||||
let engineFolder = [
|
const cpuInstruction = cpuInstructions()
|
||||||
os(),
|
let engineFolder = gpuSetting?.vulkan
|
||||||
...(gpuSetting?.vulkan
|
? 'vulkan'
|
||||||
? []
|
: process.platform === 'darwin'
|
||||||
|
? os()
|
||||||
: [
|
: [
|
||||||
gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '',
|
gpuRunMode(gpuSetting) !== 'cuda' ||
|
||||||
gpuRunMode(gpuSetting),
|
cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
|
||||||
cudaVersion(gpuSetting),
|
? cpuInstruction
|
||||||
]),
|
: 'noavx',
|
||||||
gpuSetting?.vulkan ? 'vulkan' : undefined,
|
gpuRunMode(gpuSetting),
|
||||||
]
|
cudaVersion(gpuSetting),
|
||||||
.filter((e) => !!e)
|
]
|
||||||
.join('-')
|
.filter((e) => !!e)
|
||||||
|
.join('-')
|
||||||
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||||
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
|
||||||
let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
|
let binaryName = `cortex-server${extension()}`
|
||||||
|
const binPath = path.join(__dirname, '..', 'bin')
|
||||||
return {
|
return {
|
||||||
enginePath: path.join(__dirname, '..', 'bin', engineFolder),
|
enginePath: path.join(binPath, engineFolder),
|
||||||
executablePath: path.join(__dirname, '..', 'bin', binaryName),
|
executablePath: path.join(binPath, binaryName),
|
||||||
|
binPath: binPath,
|
||||||
cudaVisibleDevices,
|
cudaVisibleDevices,
|
||||||
vkVisibleDevices,
|
vkVisibleDevices,
|
||||||
}
|
}
|
||||||
94
extensions/inference-cortex-extension/src/node/index.test.ts
Normal file
94
extensions/inference-cortex-extension/src/node/index.test.ts
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
jest.mock('@janhq/core/node', () => ({
|
||||||
|
...jest.requireActual('@janhq/core/node'),
|
||||||
|
getJanDataFolderPath: () => '',
|
||||||
|
getSystemResourceInfo: () => {
|
||||||
|
return {
|
||||||
|
cpu: {
|
||||||
|
cores: 1,
|
||||||
|
logicalCores: 1,
|
||||||
|
threads: 1,
|
||||||
|
model: 'model',
|
||||||
|
speed: 1,
|
||||||
|
},
|
||||||
|
memory: {
|
||||||
|
total: 1,
|
||||||
|
free: 1,
|
||||||
|
},
|
||||||
|
gpu: {
|
||||||
|
model: 'model',
|
||||||
|
memory: 1,
|
||||||
|
cuda: {
|
||||||
|
version: 'version',
|
||||||
|
devices: 'devices',
|
||||||
|
},
|
||||||
|
vulkan: {
|
||||||
|
version: 'version',
|
||||||
|
devices: 'devices',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('fs', () => ({
|
||||||
|
default: {
|
||||||
|
readdirSync: () => [],
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('child_process', () => ({
|
||||||
|
exec: () => {
|
||||||
|
return {
|
||||||
|
stdout: { on: jest.fn() },
|
||||||
|
stderr: { on: jest.fn() },
|
||||||
|
on: jest.fn(),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
spawn: () => {
|
||||||
|
return {
|
||||||
|
stdout: { on: jest.fn() },
|
||||||
|
stderr: { on: jest.fn() },
|
||||||
|
on: jest.fn(),
|
||||||
|
pid: '111',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
jest.mock('./execute', () => ({
|
||||||
|
executableCortexFile: () => {
|
||||||
|
return {
|
||||||
|
enginePath: 'enginePath',
|
||||||
|
executablePath: 'executablePath',
|
||||||
|
cudaVisibleDevices: 'cudaVisibleDevices',
|
||||||
|
vkVisibleDevices: 'vkVisibleDevices',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
import index from './index'
|
||||||
|
|
||||||
|
describe('dispose', () => {
|
||||||
|
it('should dispose a model successfully on Mac', async () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'darwin',
|
||||||
|
})
|
||||||
|
|
||||||
|
// Call the dispose function
|
||||||
|
const result = await index.dispose()
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should kill the subprocess successfully on Windows', async () => {
|
||||||
|
Object.defineProperty(process, 'platform', {
|
||||||
|
value: 'win32',
|
||||||
|
})
|
||||||
|
|
||||||
|
// Call the killSubprocess function
|
||||||
|
const result = await index.dispose()
|
||||||
|
|
||||||
|
// Assert that the result is as expected
|
||||||
|
expect(result).toBeUndefined()
|
||||||
|
})
|
||||||
|
})
|
||||||
103
extensions/inference-cortex-extension/src/node/index.ts
Normal file
103
extensions/inference-cortex-extension/src/node/index.ts
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
import path from 'path'
|
||||||
|
import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
|
||||||
|
import { executableCortexFile } from './execute'
|
||||||
|
import { ProcessWatchdog } from './watchdog'
|
||||||
|
import { appResourcePath } from '@janhq/core/node'
|
||||||
|
|
||||||
|
// The HOST address to use for the Nitro subprocess
|
||||||
|
const LOCAL_PORT = '39291'
|
||||||
|
let watchdog: ProcessWatchdog | undefined = undefined
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spawns a Nitro subprocess.
|
||||||
|
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||||
|
*/
|
||||||
|
function run(systemInfo?: SystemInformation): Promise<any> {
|
||||||
|
log(`[CORTEX]:: Spawning cortex subprocess...`)
|
||||||
|
|
||||||
|
return new Promise<void>(async (resolve, reject) => {
|
||||||
|
let executableOptions = executableCortexFile(
|
||||||
|
// If ngl is not set or equal to 0, run on CPU with correct instructions
|
||||||
|
systemInfo?.gpuSetting
|
||||||
|
? {
|
||||||
|
...systemInfo.gpuSetting,
|
||||||
|
run_mode: systemInfo.gpuSetting.run_mode,
|
||||||
|
}
|
||||||
|
: undefined
|
||||||
|
)
|
||||||
|
|
||||||
|
// Execute the binary
|
||||||
|
log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
|
||||||
|
log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
|
||||||
|
|
||||||
|
addEnvPaths(path.join(appResourcePath(), 'shared'))
|
||||||
|
addEnvPaths(executableOptions.binPath)
|
||||||
|
addEnvPaths(executableOptions.enginePath)
|
||||||
|
|
||||||
|
const dataFolderPath = getJanDataFolderPath()
|
||||||
|
watchdog = new ProcessWatchdog(
|
||||||
|
executableOptions.executablePath,
|
||||||
|
[
|
||||||
|
'--start-server',
|
||||||
|
'--port',
|
||||||
|
LOCAL_PORT.toString(),
|
||||||
|
'--config_file_path',
|
||||||
|
`${path.join(dataFolderPath, '.janrc')}`,
|
||||||
|
'--data_folder_path',
|
||||||
|
dataFolderPath,
|
||||||
|
],
|
||||||
|
{
|
||||||
|
cwd: executableOptions.enginePath,
|
||||||
|
env: {
|
||||||
|
...process.env,
|
||||||
|
ENGINE_PATH: executableOptions.enginePath,
|
||||||
|
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||||
|
// Vulkan - Support 1 device at a time for now
|
||||||
|
...(executableOptions.vkVisibleDevices?.length > 0 && {
|
||||||
|
GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
watchdog.start()
|
||||||
|
resolve()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Every module should have a dispose function
|
||||||
|
* This will be called when the extension is unloaded and should clean up any resources
|
||||||
|
* Also called when app is closed
|
||||||
|
*/
|
||||||
|
function dispose() {
|
||||||
|
watchdog?.terminate()
|
||||||
|
}
|
||||||
|
|
||||||
|
function addEnvPaths(dest: string) {
|
||||||
|
// Add engine path to the PATH and LD_LIBRARY_PATH
|
||||||
|
if (process.platform === 'win32') {
|
||||||
|
process.env.PATH = (process.env.PATH || '').concat(
|
||||||
|
path.delimiter,
|
||||||
|
dest,
|
||||||
|
)
|
||||||
|
log(`[CORTEX] PATH: ${process.env.PATH}`)
|
||||||
|
} else {
|
||||||
|
process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
|
||||||
|
path.delimiter,
|
||||||
|
dest,
|
||||||
|
)
|
||||||
|
log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cortex process info
|
||||||
|
*/
|
||||||
|
export interface CortexProcessInfo {
|
||||||
|
isRunning: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
export default {
|
||||||
|
run,
|
||||||
|
dispose,
|
||||||
|
}
|
||||||
84
extensions/inference-cortex-extension/src/node/watchdog.ts
Normal file
84
extensions/inference-cortex-extension/src/node/watchdog.ts
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
import { log } from '@janhq/core/node'
|
||||||
|
import { spawn, ChildProcess } from 'child_process'
|
||||||
|
import { EventEmitter } from 'events'
|
||||||
|
|
||||||
|
interface WatchdogOptions {
|
||||||
|
cwd?: string
|
||||||
|
restartDelay?: number
|
||||||
|
maxRestarts?: number
|
||||||
|
env?: NodeJS.ProcessEnv
|
||||||
|
}
|
||||||
|
|
||||||
|
export class ProcessWatchdog extends EventEmitter {
|
||||||
|
private command: string
|
||||||
|
private args: string[]
|
||||||
|
private options: WatchdogOptions
|
||||||
|
private process: ChildProcess | null
|
||||||
|
private restartDelay: number
|
||||||
|
private maxRestarts: number
|
||||||
|
private restartCount: number
|
||||||
|
private isTerminating: boolean
|
||||||
|
|
||||||
|
constructor(command: string, args: string[], options: WatchdogOptions = {}) {
|
||||||
|
super()
|
||||||
|
this.command = command
|
||||||
|
this.args = args
|
||||||
|
this.options = options
|
||||||
|
this.process = null
|
||||||
|
this.restartDelay = options.restartDelay || 5000
|
||||||
|
this.maxRestarts = options.maxRestarts || 5
|
||||||
|
this.restartCount = 0
|
||||||
|
this.isTerminating = false
|
||||||
|
}
|
||||||
|
|
||||||
|
start(): void {
|
||||||
|
this.spawnProcess()
|
||||||
|
}
|
||||||
|
|
||||||
|
private spawnProcess(): void {
|
||||||
|
if (this.isTerminating) return
|
||||||
|
|
||||||
|
log(`Starting process: ${this.command} ${this.args.join(' ')}`)
|
||||||
|
this.process = spawn(this.command, this.args, this.options)
|
||||||
|
|
||||||
|
this.process.stdout?.on('data', (data: Buffer) => {
|
||||||
|
log(`Process output: ${data}`)
|
||||||
|
this.emit('output', data.toString())
|
||||||
|
})
|
||||||
|
|
||||||
|
this.process.stderr?.on('data', (data: Buffer) => {
|
||||||
|
log(`Process error: ${data}`)
|
||||||
|
this.emit('error', data.toString())
|
||||||
|
})
|
||||||
|
|
||||||
|
this.process.on('close', (code: number | null) => {
|
||||||
|
log(`Process exited with code ${code}`)
|
||||||
|
this.emit('close', code)
|
||||||
|
if (!this.isTerminating) {
|
||||||
|
this.restartProcess()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
private restartProcess(): void {
|
||||||
|
if (this.restartCount < this.maxRestarts) {
|
||||||
|
this.restartCount++
|
||||||
|
log(
|
||||||
|
`Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
|
||||||
|
)
|
||||||
|
setTimeout(() => this.spawnProcess(), this.restartDelay)
|
||||||
|
} else {
|
||||||
|
log('Max restart attempts reached. Exiting watchdog.')
|
||||||
|
this.emit('maxRestartsReached')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
terminate(): void {
|
||||||
|
this.isTerminating = true
|
||||||
|
if (this.process) {
|
||||||
|
log('Terminating watched process...')
|
||||||
|
this.process.kill()
|
||||||
|
}
|
||||||
|
this.emit('terminated')
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,9 +1,8 @@
|
|||||||
{
|
{
|
||||||
"compilerOptions": {
|
"compilerOptions": {
|
||||||
"moduleResolution": "node",
|
"moduleResolution": "node",
|
||||||
"target": "ES2015",
|
"target": "es2016",
|
||||||
"module": "ES2020",
|
"module": "esnext",
|
||||||
"lib": ["es2015", "es2016", "es2017", "dom"],
|
|
||||||
"strict": true,
|
"strict": true,
|
||||||
"sourceMap": true,
|
"sourceMap": true,
|
||||||
"declaration": true,
|
"declaration": true,
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user