import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; import { formatDocumentsAsString } from "langchain/util/document"; import { PDFLoader } from "langchain/document_loaders/fs/pdf"; import { HNSWLib } from "langchain/vectorstores/hnswlib"; import { OpenAIEmbeddings } from "langchain/embeddings/openai"; import { readEmbeddingEngine } from "../../engine"; export class Retrieval { public chunkSize: number = 100; public chunkOverlap?: number = 0; private retriever: any; private embeddingModel: any = undefined; private textSplitter?: RecursiveCharacterTextSplitter; constructor(chunkSize: number = 4000, chunkOverlap: number = 200) { this.updateTextSplitter(chunkSize, chunkOverlap); this.embeddingModel = new OpenAIEmbeddings({}); } public updateTextSplitter(chunkSize: number, chunkOverlap: number): void { this.chunkSize = chunkSize; this.chunkOverlap = chunkOverlap; this.textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: chunkSize, chunkOverlap: chunkOverlap, }); } public updateEmbeddingEngine(engine: string): void { // Engine settings are not compatible with the current embedding model params // Switch case manually for now const settings = readEmbeddingEngine(engine); if (engine === "nitro") { this.embeddingModel = new OpenAIEmbeddings( { openAIApiKey: "nitro-embedding" }, { basePath: "http://127.0.0.1:3928/v1" }, ); } else { // Fallback to OpenAI Settings this.embeddingModel = new OpenAIEmbeddings({ configuration: { apiKey: settings.api_key, }, }); } } public ingestAgentKnowledge = async ( filePath: string, memoryPath: string, ): Promise => { const loader = new PDFLoader(filePath, { splitPages: true, }); const doc = await loader.load(); const docs = await this.textSplitter!.splitDocuments(doc); const vectorStore = await HNSWLib.fromDocuments(docs, this.embeddingModel); return vectorStore.save(memoryPath); }; public loadRetrievalAgent = async (memoryPath: string): Promise => { const vectorStore = await HNSWLib.load(memoryPath, this.embeddingModel); this.retriever = vectorStore.asRetriever(2); return Promise.resolve(); }; public generateResult = async (query: string): Promise => { if (!this.retriever) { return Promise.resolve(" "); } const relevantDocs = await this.retriever.getRelevantDocuments(query); const serializedDoc = formatDocumentsAsString(relevantDocs); return Promise.resolve(serializedDoc); }; }