2025-10-09 03:48:51 +07:00

108 lines
4.5 KiB
TypeScript

import { VectorDBExtension, type SearchMode, type VectorDBStatus, type VectorChunkInput, type VectorSearchResult, type AttachmentFileInfo, type VectorDBFileInput, type VectorDBIngestOptions, AIEngine } from '@janhq/core'
import * as vecdb from '@janhq/tauri-plugin-vector-db-api'
import * as ragApi from '@janhq/tauri-plugin-rag-api'
export default class VectorDBExt extends VectorDBExtension {
async onLoad(): Promise<void> {
// no-op
}
onUnload(): void {}
async getStatus(): Promise<VectorDBStatus> {
return await vecdb.getStatus() as VectorDBStatus
}
private collectionForThread(threadId: string): string {
return `attachments_${threadId}`
}
async createCollection(threadId: string, dimension: number): Promise<void> {
return await vecdb.createCollection(this.collectionForThread(threadId), dimension)
}
async insertChunks(threadId: string, fileId: string, chunks: VectorChunkInput[]): Promise<void> {
return await vecdb.insertChunks(this.collectionForThread(threadId), fileId, chunks)
}
async searchCollection(
threadId: string,
query_embedding: number[],
limit: number,
threshold: number,
mode?: SearchMode,
fileIds?: string[]
): Promise<VectorSearchResult[]> {
return await vecdb.searchCollection(this.collectionForThread(threadId), query_embedding, limit, threshold, mode, fileIds) as VectorSearchResult[]
}
async deleteChunks(threadId: string, ids: string[]): Promise<void> {
return await vecdb.deleteChunks(this.collectionForThread(threadId), ids)
}
async deleteCollection(threadId: string): Promise<void> {
return await vecdb.deleteCollection(this.collectionForThread(threadId))
}
// Optional helper for chunking
private async chunkText(text: string, chunkSize: number, chunkOverlap: number): Promise<string[]> {
return await vecdb.chunkText(text, chunkSize, chunkOverlap)
}
private async embedTexts(texts: string[]): Promise<number[][]> {
const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & { embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }> }
if (!llm?.embed) throw new Error('llamacpp extension not available')
const res = await llm.embed(texts)
const data: Array<{ embedding: number[]; index: number }> = res?.data || []
const out: number[][] = new Array(texts.length)
for (const item of data) out[item.index] = item.embedding
return out
}
async ingestFile(threadId: string, file: VectorDBFileInput, opts: VectorDBIngestOptions): Promise<AttachmentFileInfo> {
// Check for duplicate file (same name + path)
const existingFiles = await vecdb.listAttachments(this.collectionForThread(threadId)).catch(() => [])
const duplicate = existingFiles.find((f: any) => f.name === file.name && f.path === file.path)
if (duplicate) {
throw new Error(`File '${file.name}' has already been attached to this thread`)
}
const text = await ragApi.parseDocument(file.path, file.type || 'application/octet-stream')
const chunks = await this.chunkText(text, opts.chunkSize, opts.chunkOverlap)
if (!chunks.length) {
const fi = await vecdb.createFile(this.collectionForThread(threadId), file)
return fi
}
const embeddings = await this.embedTexts(chunks)
const dimension = embeddings[0]?.length || 0
if (dimension <= 0) throw new Error('Embedding dimension not available')
await this.createCollection(threadId, dimension)
const fi = await vecdb.createFile(this.collectionForThread(threadId), file)
await vecdb.insertChunks(
this.collectionForThread(threadId),
fi.id,
chunks.map((t, i) => ({ text: t, embedding: embeddings[i] }))
)
const infos = await vecdb.listAttachments(this.collectionForThread(threadId))
const updated = infos.find((e) => e.id === fi.id)
return updated || { ...fi, chunk_count: chunks.length }
}
async listAttachments(threadId: string, limit?: number): Promise<AttachmentFileInfo[]> {
return await vecdb.listAttachments(this.collectionForThread(threadId), limit) as AttachmentFileInfo[]
}
async getChunks(
threadId: string,
fileId: string,
startOrder: number,
endOrder: number
): Promise<VectorSearchResult[]> {
return await vecdb.getChunks(this.collectionForThread(threadId), fileId, startOrder, endOrder) as VectorSearchResult[]
}
async deleteFile(threadId: string, fileId: string): Promise<void> {
return await vecdb.deleteFile(this.collectionForThread(threadId), fileId)
}
}