import { BaseExtension } from '../../extension' import { EngineManager } from './EngineManager' /* AIEngine class types */ export interface chatCompletionRequestMessage { role: 'system' | 'user' | 'assistant' | 'tool' content: string | null | Content[] // Content can be a string OR an array of content parts name?: string tool_calls?: any[] // Simplified tool_call_id?: string } export interface Content { type: 'text' | 'input_image' | 'input_audio' text?: string image_url?: string input_audio?: InputAudio } export interface InputAudio { data: string // Base64 encoded audio data format: 'mp3' | 'wav' | 'ogg' | 'flac' // Add more formats as needed/llama-server seems to support mp3 } export interface chatCompletionRequest { model: string // Model ID, though for local it might be implicit via sessionInfo messages: chatCompletionRequestMessage[] // Core sampling parameters temperature?: number | null dynatemp_range?: number | null dynatemp_exponent?: number | null top_k?: number | null top_p?: number | null min_p?: number | null typical_p?: number | null repeat_penalty?: number | null repeat_last_n?: number | null presence_penalty?: number | null frequency_penalty?: number | null dry_multiplier?: number | null dry_base?: number | null dry_allowed_length?: number | null dry_penalty_last_n?: number | null dry_sequence_breakers?: string[] | null xtc_probability?: number | null xtc_threshold?: number | null mirostat?: number | null // 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0 mirostat_tau?: number | null mirostat_eta?: number | null n_predict?: number | null n_indent?: number | null n_keep?: number | null stream?: boolean | null stop?: string | string[] | null seed?: number | null // RNG seed // Advanced sampling logit_bias?: { [key: string]: number } | null n_probs?: number | null min_keep?: number | null t_max_predict_ms?: number | null image_data?: Array<{ data: string; id: number }> | null // Internal/optimization parameters id_slot?: number | null cache_prompt?: boolean | null return_tokens?: boolean | null samplers?: string[] | null timings_per_token?: boolean | null post_sampling_probs?: boolean | null } export interface chatCompletionChunkChoiceDelta { content?: string | null role?: 'system' | 'user' | 'assistant' | 'tool' tool_calls?: any[] // Simplified } export interface chatCompletionChunkChoice { index: number delta: chatCompletionChunkChoiceDelta finish_reason?: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null } export interface chatCompletionChunk { id: string object: 'chat.completion.chunk' created: number model: string choices: chatCompletionChunkChoice[] system_fingerprint?: string } export interface chatCompletionChoice { index: number message: chatCompletionRequestMessage // Response message finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' logprobs?: any // Simplified } export interface chatCompletion { id: string object: 'chat.completion' created: number model: string // Model ID used choices: chatCompletionChoice[] usage?: { prompt_tokens: number completion_tokens: number total_tokens: number } system_fingerprint?: string } // --- End OpenAI types --- // Shared model metadata export interface modelInfo { id: string // e.g. "qwen3-4B" or "org/model/quant" name: string // human‑readable, e.g., "Qwen3 4B Q4_0" quant_type?: string // q4_0 (optional as it might be part of ID or name) providerId: string // e.g. "llama.cpp" port: number sizeBytes: number tags?: string[] path?: string // Absolute path to the model file, if applicable // Additional provider-specific metadata can be added here [key: string]: any } // 1. /list export type listResult = modelInfo[] export interface SessionInfo { pid: string // opaque handle for unload/chat port: number // llama-server output port (corrected from portid) model_id: string, //name of the model model_path: string // path of the loaded model api_key: string } export interface UnloadResult { success: boolean error?: string } // 5. /chat export interface chatOptions { providerId: string sessionId: string /** Full OpenAI ChatCompletionRequest payload */ payload: chatCompletionRequest } // Output for /chat will be Promise for non-streaming // or Promise> for streaming // 7. /import export interface ImportOptions { modelPath: string mmprojPath?: string } export interface importResult { success: boolean modelInfo?: modelInfo error?: string } /** * Base AIEngine * Applicable to all AI Engines */ export abstract class AIEngine extends BaseExtension { // The inference engine ID, implementing the readonly providerId from interface abstract readonly provider: string /** * On extension load, subscribe to events. */ override onLoad() { this.registerEngine() } /** * Registers AI Engines */ registerEngine() { EngineManager.instance().register(this) } /** * Lists available models */ abstract list(): Promise /** * Loads a model into memory */ abstract load(modelId: string): Promise /** * Unloads a model from memory */ abstract unload(sessionId: string): Promise /** * Sends a chat request to the model */ abstract chat( opts: chatCompletionRequest ): Promise> /** * Deletes a model */ abstract delete(modelId: string): Promise /** * Imports a model */ abstract import(modelId: string, opts: ImportOptions): Promise /** * Aborts an ongoing model import */ abstract abortImport(modelId: string): Promise /** * Get currently loaded models */ abstract getLoadedModels(): Promise /** * Optional method to get the underlying chat client */ getChatClient?(sessionId: string): any }