675 lines
21 KiB
TypeScript
675 lines
21 KiB
TypeScript
/**
|
|
* Default Models Service - Web implementation
|
|
*/
|
|
|
|
import { sanitizeModelId } from '@/lib/utils'
|
|
import {
|
|
AIEngine,
|
|
EngineManager,
|
|
SessionInfo,
|
|
SettingComponentProps,
|
|
modelInfo,
|
|
ThreadMessage,
|
|
ContentType,
|
|
} from '@janhq/core'
|
|
import { Model as CoreModel } from '@janhq/core'
|
|
import type {
|
|
ModelsService,
|
|
ModelCatalog,
|
|
HuggingFaceRepo,
|
|
CatalogModel,
|
|
ModelValidationResult,
|
|
ModelPlan,
|
|
} from './types'
|
|
|
|
// TODO: Replace this with the actual provider later
|
|
const defaultProvider = 'llamacpp'
|
|
|
|
export class DefaultModelsService implements ModelsService {
|
|
private getEngine(provider: string = defaultProvider) {
|
|
return EngineManager.instance().get(provider) as AIEngine | undefined
|
|
}
|
|
|
|
async getModel(modelId: string): Promise<modelInfo | undefined> {
|
|
return this.getEngine()?.get(modelId)
|
|
}
|
|
|
|
async fetchModels(): Promise<modelInfo[]> {
|
|
return this.getEngine()?.list() ?? []
|
|
}
|
|
|
|
async fetchModelCatalog(): Promise<ModelCatalog> {
|
|
try {
|
|
const response = await fetch(MODEL_CATALOG_URL)
|
|
|
|
if (!response.ok) {
|
|
throw new Error(
|
|
`Failed to fetch model catalog: ${response.status} ${response.statusText}`
|
|
)
|
|
}
|
|
|
|
const catalog: ModelCatalog = await response.json()
|
|
return catalog
|
|
} catch (error) {
|
|
console.error('Error fetching model catalog:', error)
|
|
throw new Error(
|
|
`Failed to fetch model catalog: ${error instanceof Error ? error.message : 'Unknown error'}`
|
|
)
|
|
}
|
|
}
|
|
|
|
async fetchHuggingFaceRepo(
|
|
repoId: string,
|
|
hfToken?: string
|
|
): Promise<HuggingFaceRepo | null> {
|
|
try {
|
|
// Clean the repo ID to handle various input formats
|
|
const cleanRepoId = repoId
|
|
.replace(/^https?:\/\/huggingface\.co\//, '')
|
|
.replace(/^huggingface\.co\//, '')
|
|
.replace(/\/$/, '') // Remove trailing slash
|
|
.trim()
|
|
|
|
if (!cleanRepoId || !cleanRepoId.includes('/')) {
|
|
return null
|
|
}
|
|
|
|
const response = await fetch(
|
|
`https://huggingface.co/api/models/${cleanRepoId}?blobs=true&files_metadata=true`,
|
|
{
|
|
headers: hfToken
|
|
? {
|
|
Authorization: `Bearer ${hfToken}`,
|
|
}
|
|
: {},
|
|
}
|
|
)
|
|
|
|
if (!response.ok) {
|
|
if (response.status === 404) {
|
|
return null // Repository not found
|
|
}
|
|
throw new Error(
|
|
`Failed to fetch HuggingFace repository: ${response.status} ${response.statusText}`
|
|
)
|
|
}
|
|
|
|
const repoData = await response.json()
|
|
return repoData
|
|
} catch (error) {
|
|
console.error('Error fetching HuggingFace repository:', error)
|
|
return null
|
|
}
|
|
}
|
|
|
|
convertHfRepoToCatalogModel(repo: HuggingFaceRepo): CatalogModel {
|
|
// Format file size helper
|
|
const formatFileSize = (size?: number) => {
|
|
if (!size) return 'Unknown size'
|
|
if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB`
|
|
return `${(size / 1024 ** 3).toFixed(1)} GB`
|
|
}
|
|
|
|
// Extract GGUF files from the repository siblings
|
|
const ggufFiles =
|
|
repo.siblings?.filter((file) =>
|
|
file.rfilename.toLowerCase().endsWith('.gguf')
|
|
) || []
|
|
|
|
// Separate regular GGUF files from mmproj files
|
|
const regularGgufFiles = ggufFiles.filter(
|
|
(file) => !file.rfilename.toLowerCase().includes('mmproj')
|
|
)
|
|
|
|
const mmprojFiles = ggufFiles.filter((file) =>
|
|
file.rfilename.toLowerCase().includes('mmproj')
|
|
)
|
|
|
|
// Convert regular GGUF files to quants format
|
|
const quants = regularGgufFiles.map((file) => {
|
|
// Generate model_id from filename (remove .gguf extension, case-insensitive)
|
|
const modelId = file.rfilename.replace(/\.gguf$/i, '')
|
|
|
|
return {
|
|
model_id: `${repo.author}/${sanitizeModelId(modelId)}`,
|
|
path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
|
|
file_size: formatFileSize(file.size),
|
|
}
|
|
})
|
|
|
|
// Convert mmproj files to mmproj_models format
|
|
const mmprojModels = mmprojFiles.map((file) => {
|
|
const modelId = file.rfilename.replace(/\.gguf$/i, '')
|
|
|
|
return {
|
|
model_id: sanitizeModelId(modelId),
|
|
path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
|
|
file_size: formatFileSize(file.size),
|
|
}
|
|
})
|
|
|
|
return {
|
|
model_name: repo.modelId,
|
|
developer: repo.author,
|
|
downloads: repo.downloads || 0,
|
|
created_at: repo.createdAt,
|
|
num_quants: quants.length,
|
|
quants: quants,
|
|
num_mmproj: mmprojModels.length,
|
|
mmproj_models: mmprojModels,
|
|
readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`,
|
|
description: `**Tags**: ${repo.tags?.join(', ')}`,
|
|
}
|
|
}
|
|
|
|
async updateModel(modelId: string, model: Partial<CoreModel>): Promise<void> {
|
|
if (model.settings) {
|
|
this.getEngine()?.updateSettings(
|
|
model.settings as SettingComponentProps[]
|
|
)
|
|
}
|
|
// Note: Model name/ID updates are handled at the provider level in the frontend
|
|
// The engine doesn't have an update method for model metadata
|
|
console.log('Model update request processed for modelId:', modelId)
|
|
}
|
|
|
|
async pullModel(
|
|
id: string,
|
|
modelPath: string,
|
|
modelSha256?: string,
|
|
modelSize?: number,
|
|
mmprojPath?: string,
|
|
mmprojSha256?: string,
|
|
mmprojSize?: number
|
|
): Promise<void> {
|
|
return this.getEngine()?.import(id, {
|
|
modelPath,
|
|
mmprojPath,
|
|
modelSha256,
|
|
modelSize,
|
|
mmprojSha256,
|
|
mmprojSize,
|
|
})
|
|
}
|
|
|
|
async pullModelWithMetadata(
|
|
id: string,
|
|
modelPath: string,
|
|
mmprojPath?: string,
|
|
hfToken?: string
|
|
): Promise<void> {
|
|
let modelSha256: string | undefined
|
|
let modelSize: number | undefined
|
|
let mmprojSha256: string | undefined
|
|
let mmprojSize: number | undefined
|
|
|
|
// Extract repo ID from model URL
|
|
// URL format: https://huggingface.co/{repo}/resolve/main/{filename}
|
|
const modelUrlMatch = modelPath.match(
|
|
/https:\/\/huggingface\.co\/([^/]+\/[^/]+)\/resolve\/main\/(.+)/
|
|
)
|
|
|
|
if (modelUrlMatch) {
|
|
const [, repoId, modelFilename] = modelUrlMatch
|
|
|
|
try {
|
|
// Fetch real-time metadata from HuggingFace
|
|
const repoInfo = await this.fetchHuggingFaceRepo(repoId, hfToken)
|
|
|
|
if (repoInfo?.siblings) {
|
|
// Find the specific model file
|
|
const modelFile = repoInfo.siblings.find(
|
|
(file) => file.rfilename === modelFilename
|
|
)
|
|
if (modelFile?.lfs) {
|
|
modelSha256 = modelFile.lfs.sha256
|
|
modelSize = modelFile.lfs.size
|
|
}
|
|
|
|
// If mmproj path provided, extract its metadata too
|
|
if (mmprojPath) {
|
|
const mmprojUrlMatch = mmprojPath.match(
|
|
/https:\/\/huggingface\.co\/[^/]+\/[^/]+\/resolve\/main\/(.+)/
|
|
)
|
|
if (mmprojUrlMatch) {
|
|
const [, mmprojFilename] = mmprojUrlMatch
|
|
const mmprojFile = repoInfo.siblings.find(
|
|
(file) => file.rfilename === mmprojFilename
|
|
)
|
|
if (mmprojFile?.lfs) {
|
|
mmprojSha256 = mmprojFile.lfs.sha256
|
|
mmprojSize = mmprojFile.lfs.size
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.warn(
|
|
'Failed to fetch HuggingFace metadata, proceeding without hash verification:',
|
|
error
|
|
)
|
|
// Continue with download even if metadata fetch fails
|
|
}
|
|
}
|
|
|
|
// Call the original pullModel with the fetched metadata
|
|
return this.pullModel(
|
|
id,
|
|
modelPath,
|
|
modelSha256,
|
|
modelSize,
|
|
mmprojPath,
|
|
mmprojSha256,
|
|
mmprojSize
|
|
)
|
|
}
|
|
|
|
async abortDownload(id: string): Promise<void> {
|
|
return this.getEngine()?.abortImport(id)
|
|
}
|
|
|
|
async deleteModel(id: string): Promise<void> {
|
|
return this.getEngine()?.delete(id)
|
|
}
|
|
|
|
async getActiveModels(provider?: string): Promise<string[]> {
|
|
return this.getEngine(provider)?.getLoadedModels() ?? []
|
|
}
|
|
|
|
async stopModel(model: string, provider?: string): Promise<void> {
|
|
this.getEngine(provider)?.unload(model)
|
|
}
|
|
|
|
async stopAllModels(): Promise<void> {
|
|
const models = await this.getActiveModels()
|
|
if (models) await Promise.all(models.map((model) => this.stopModel(model)))
|
|
}
|
|
|
|
async startModel(
|
|
provider: ProviderObject,
|
|
model: string
|
|
): Promise<SessionInfo | undefined> {
|
|
const engine = this.getEngine(provider.provider)
|
|
if (!engine) return undefined
|
|
|
|
const loadedModels = await engine.getLoadedModels()
|
|
if (loadedModels.includes(model)) return undefined
|
|
|
|
// Find the model configuration to get settings
|
|
const modelConfig = provider.models.find((m) => m.id === model)
|
|
|
|
// Key mapping function to transform setting keys
|
|
const mapSettingKey = (key: string): string => {
|
|
const keyMappings: Record<string, string> = {
|
|
ctx_len: 'ctx_size',
|
|
ngl: 'n_gpu_layers',
|
|
}
|
|
return keyMappings[key] || key
|
|
}
|
|
|
|
const settings = modelConfig?.settings
|
|
? Object.fromEntries(
|
|
Object.entries(modelConfig.settings).map(([key, value]) => [
|
|
mapSettingKey(key),
|
|
value.controller_props?.value,
|
|
])
|
|
)
|
|
: undefined
|
|
|
|
return engine.load(model, settings).catch((error) => {
|
|
console.error(
|
|
`Failed to start model ${model} for provider ${provider.provider}:`,
|
|
error
|
|
)
|
|
throw error
|
|
})
|
|
}
|
|
|
|
async isToolSupported(modelId: string): Promise<boolean> {
|
|
const engine = this.getEngine()
|
|
if (!engine) return false
|
|
|
|
return engine.isToolSupported(modelId)
|
|
}
|
|
|
|
async checkMmprojExistsAndUpdateOffloadMMprojSetting(
|
|
modelId: string,
|
|
updateProvider?: (
|
|
providerName: string,
|
|
data: Partial<ModelProvider>
|
|
) => void,
|
|
getProviderByName?: (providerName: string) => ModelProvider | undefined
|
|
): Promise<{ exists: boolean; settingsUpdated: boolean }> {
|
|
let settingsUpdated = false
|
|
|
|
try {
|
|
const engine = this.getEngine('llamacpp') as AIEngine & {
|
|
checkMmprojExists?: (id: string) => Promise<boolean>
|
|
}
|
|
if (engine && typeof engine.checkMmprojExists === 'function') {
|
|
const exists = await engine.checkMmprojExists(modelId)
|
|
|
|
// If we have the store functions, use them; otherwise fall back to localStorage
|
|
if (updateProvider && getProviderByName) {
|
|
const provider = getProviderByName('llamacpp')
|
|
if (provider) {
|
|
const model = provider.models.find((m) => m.id === modelId)
|
|
|
|
if (model?.settings) {
|
|
const hasOffloadMmproj = 'offload_mmproj' in model.settings
|
|
|
|
// If mmproj exists, add offload_mmproj setting (only if it doesn't exist)
|
|
if (exists && !hasOffloadMmproj) {
|
|
// Create updated models array with the new setting
|
|
const updatedModels = provider.models.map((m) => {
|
|
if (m.id === modelId) {
|
|
return {
|
|
...m,
|
|
settings: {
|
|
...m.settings,
|
|
offload_mmproj: {
|
|
key: 'offload_mmproj',
|
|
title: 'Offload MMProj',
|
|
description:
|
|
'Offload multimodal projection model to GPU',
|
|
controller_type: 'checkbox',
|
|
controller_props: {
|
|
value: true,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
return m
|
|
})
|
|
|
|
// Update the provider with the new models array
|
|
updateProvider('llamacpp', { models: updatedModels })
|
|
settingsUpdated = true
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Fall back to localStorage approach for backwards compatibility
|
|
try {
|
|
const modelProviderData = JSON.parse(
|
|
localStorage.getItem('model-provider') || '{}'
|
|
)
|
|
const llamacppProvider = modelProviderData.state?.providers?.find(
|
|
(p: { provider: string }) => p.provider === 'llamacpp'
|
|
)
|
|
const model = llamacppProvider?.models?.find(
|
|
(m: { id: string; settings?: Record<string, unknown> }) =>
|
|
m.id === modelId
|
|
)
|
|
|
|
if (model?.settings) {
|
|
// If mmproj exists, add offload_mmproj setting (only if it doesn't exist)
|
|
if (exists) {
|
|
if (!model.settings.offload_mmproj) {
|
|
model.settings.offload_mmproj = {
|
|
key: 'offload_mmproj',
|
|
title: 'Offload MMProj',
|
|
description: 'Offload multimodal projection layers to GPU',
|
|
controller_type: 'checkbox',
|
|
controller_props: {
|
|
value: true,
|
|
},
|
|
}
|
|
// Save updated settings back to localStorage
|
|
localStorage.setItem(
|
|
'model-provider',
|
|
JSON.stringify(modelProviderData)
|
|
)
|
|
settingsUpdated = true
|
|
}
|
|
}
|
|
}
|
|
} catch (localStorageError) {
|
|
console.error(
|
|
`Error checking localStorage for model ${modelId}:`,
|
|
localStorageError
|
|
)
|
|
}
|
|
}
|
|
|
|
return { exists, settingsUpdated }
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error checking mmproj for model ${modelId}:`, error)
|
|
}
|
|
return { exists: false, settingsUpdated }
|
|
}
|
|
|
|
async checkMmprojExists(modelId: string): Promise<boolean> {
|
|
try {
|
|
const engine = this.getEngine('llamacpp') as AIEngine & {
|
|
checkMmprojExists?: (id: string) => Promise<boolean>
|
|
}
|
|
if (engine && typeof engine.checkMmprojExists === 'function') {
|
|
return await engine.checkMmprojExists(modelId)
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error checking mmproj for model ${modelId}:`, error)
|
|
}
|
|
return false
|
|
}
|
|
|
|
async isModelSupported(
|
|
modelPath: string,
|
|
ctxSize?: number
|
|
): Promise<'RED' | 'YELLOW' | 'GREEN' | 'GREY'> {
|
|
try {
|
|
const engine = this.getEngine('llamacpp') as AIEngine & {
|
|
isModelSupported?: (
|
|
path: string,
|
|
ctx_size?: number
|
|
) => Promise<'RED' | 'YELLOW' | 'GREEN'>
|
|
}
|
|
if (engine && typeof engine.isModelSupported === 'function') {
|
|
return await engine.isModelSupported(modelPath, ctxSize)
|
|
}
|
|
// Fallback if method is not available
|
|
console.warn('isModelSupported method not available in llamacpp engine')
|
|
return 'YELLOW' // Conservative fallback
|
|
} catch (error) {
|
|
console.error(`Error checking model support for ${modelPath}:`, error)
|
|
return 'GREY' // Error state, assume not supported
|
|
}
|
|
}
|
|
|
|
async validateGgufFile(filePath: string): Promise<ModelValidationResult> {
|
|
try {
|
|
const engine = this.getEngine('llamacpp') as AIEngine & {
|
|
validateGgufFile?: (path: string) => Promise<ModelValidationResult>
|
|
}
|
|
|
|
if (engine && typeof engine.validateGgufFile === 'function') {
|
|
return await engine.validateGgufFile(filePath)
|
|
}
|
|
|
|
// If the specific method isn't available, we can fallback to a basic check
|
|
console.warn('validateGgufFile method not available in llamacpp engine')
|
|
return {
|
|
isValid: true, // Assume valid for now
|
|
error: 'Validation method not available',
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error validating GGUF file ${filePath}:`, error)
|
|
return {
|
|
isValid: false,
|
|
error: error instanceof Error ? error.message : 'Unknown error',
|
|
}
|
|
}
|
|
}
|
|
|
|
async planModelLoad(
|
|
modelPath: string,
|
|
mmprojPath?: string,
|
|
requestedCtx?: number
|
|
): Promise<ModelPlan> {
|
|
try {
|
|
const engine = this.getEngine('llamacpp') as AIEngine & {
|
|
planModelLoad?: (
|
|
path: string,
|
|
mmprojPath?: string,
|
|
requestedCtx?: number
|
|
) => Promise<ModelPlan>
|
|
}
|
|
|
|
if (engine && typeof engine.planModelLoad === 'function') {
|
|
// Get the full absolute path to the model file
|
|
const janDataFolderPath = await import('@janhq/core').then((core) =>
|
|
core.getJanDataFolderPath()
|
|
)
|
|
const joinPath = await import('@janhq/core').then(
|
|
(core) => core.joinPath
|
|
)
|
|
const fullModelPath = await joinPath([janDataFolderPath, modelPath])
|
|
// mmprojPath is currently unused, but included for compatibility
|
|
return await engine.planModelLoad(
|
|
fullModelPath,
|
|
mmprojPath,
|
|
requestedCtx
|
|
)
|
|
}
|
|
|
|
// Fallback if method is not available
|
|
console.warn('planModelLoad method not available in llamacpp engine')
|
|
return {
|
|
gpuLayers: 100,
|
|
maxContextLength: 2048,
|
|
noOffloadKVCache: false,
|
|
offloadMmproj: false,
|
|
batchSize: 2048,
|
|
mode: 'Unsupported',
|
|
}
|
|
} catch (error) {
|
|
console.error(`Error planning model load for path ${modelPath}:`, error)
|
|
return {
|
|
gpuLayers: 100,
|
|
maxContextLength: 2048,
|
|
noOffloadKVCache: false,
|
|
offloadMmproj: false,
|
|
batchSize: 2048,
|
|
mode: 'Unsupported',
|
|
}
|
|
}
|
|
}
|
|
|
|
async getTokensCount(
|
|
modelId: string,
|
|
messages: ThreadMessage[]
|
|
): Promise<number> {
|
|
try {
|
|
const engine = this.getEngine('llamacpp') as AIEngine & {
|
|
getTokensCount?: (opts: {
|
|
model: string
|
|
messages: Array<{
|
|
role: string
|
|
content:
|
|
| string
|
|
| Array<{
|
|
type: string
|
|
text?: string
|
|
image_url?: {
|
|
detail?: string
|
|
url?: string
|
|
}
|
|
}>
|
|
}>
|
|
chat_template_kwargs?: {
|
|
enable_thinking: boolean
|
|
}
|
|
}) => Promise<number>
|
|
}
|
|
|
|
if (engine && typeof engine.getTokensCount === 'function') {
|
|
// Transform Jan's ThreadMessage format to OpenAI chat completion format
|
|
const transformedMessages = messages
|
|
.map((message) => {
|
|
// Handle different content types
|
|
let content:
|
|
| string
|
|
| Array<{
|
|
type: string
|
|
text?: string
|
|
image_url?: {
|
|
detail?: string
|
|
url?: string
|
|
}
|
|
}> = ''
|
|
|
|
if (message.content && message.content.length > 0) {
|
|
// Check if there are any image_url content types
|
|
const hasImages = message.content.some(
|
|
(content) => content.type === ContentType.Image
|
|
)
|
|
|
|
if (hasImages) {
|
|
// For multimodal messages, preserve the array structure
|
|
content = message.content.map((contentItem) => {
|
|
if (contentItem.type === ContentType.Text) {
|
|
return {
|
|
type: 'text',
|
|
text: contentItem.text?.value || '',
|
|
}
|
|
} else if (contentItem.type === ContentType.Image) {
|
|
return {
|
|
type: 'image_url',
|
|
image_url: {
|
|
detail: contentItem.image_url?.detail,
|
|
url: contentItem.image_url?.url || '',
|
|
},
|
|
}
|
|
}
|
|
// Fallback for unknown content types
|
|
return {
|
|
type: contentItem.type,
|
|
text: contentItem.text?.value,
|
|
image_url: contentItem.image_url,
|
|
}
|
|
})
|
|
} else {
|
|
// For text-only messages, keep the string format
|
|
const textContents = message.content
|
|
.filter(
|
|
(content) =>
|
|
content.type === ContentType.Text && content.text?.value
|
|
)
|
|
.map((content) => content.text?.value || '')
|
|
|
|
content = textContents.join(' ')
|
|
}
|
|
}
|
|
|
|
return {
|
|
role: message.role,
|
|
content,
|
|
}
|
|
})
|
|
.filter((msg) =>
|
|
typeof msg.content === 'string'
|
|
? msg.content.trim() !== ''
|
|
: Array.isArray(msg.content) && msg.content.length > 0
|
|
) // Filter out empty messages
|
|
|
|
return await engine.getTokensCount({
|
|
model: modelId,
|
|
messages: transformedMessages,
|
|
chat_template_kwargs: {
|
|
enable_thinking: false,
|
|
},
|
|
})
|
|
}
|
|
|
|
// Fallback if method is not available
|
|
console.warn('getTokensCount method not available in llamacpp engine')
|
|
return 0
|
|
} catch (error) {
|
|
console.error(`Error getting tokens count for model ${modelId}:`, error)
|
|
return 0
|
|
}
|
|
}
|
|
}
|