- Complete plugin architecture with modular design - Qdrant client with HTTP integration using requestUrl - Ollama and OpenAI embedding providers with batching - Hybrid chunking (semantic + size-based fallback) - Content extractors for markdown, code, PDFs, and images - Real-time indexing with file watcher and queue - Search modal with keyboard navigation - Comprehensive settings UI with connection testing - Graph visualization framework (basic implementation) - Full TypeScript types and error handling - Desktop-only plugin with status bar integration - Complete documentation and setup guide Features implemented: ✅ Semantic search with vector embeddings ✅ Multiple embedding providers (Ollama/OpenAI) ✅ Rich content extraction (markdown, code, PDFs, images) ✅ Smart chunking with heading-based splits ✅ Real-time file indexing with progress tracking ✅ Standalone search interface ✅ Comprehensive settings and configuration ✅ Graph view foundation for document relationships ✅ Full error handling and logging ✅ Complete documentation and troubleshooting guide Ready for testing with Qdrant instance and embedding provider setup.
308 lines
8.0 KiB
TypeScript
308 lines
8.0 KiB
TypeScript
import { App, TFile } from 'obsidian';
|
|
import { PluginSettings, IndexingProgress } from '../types';
|
|
import { ExtractorManager } from '../extractors';
|
|
import { HybridChunker } from '../chunking/chunker';
|
|
import { createEmbeddingProvider } from '../embeddings';
|
|
import { QdrantClient } from '../qdrant/client';
|
|
import { CollectionManager } from '../qdrant/collection';
|
|
import { IndexingQueue } from './indexQueue';
|
|
import { FileWatcher } from './fileWatcher';
|
|
import { FileManifest } from './manifest';
|
|
|
|
export class IndexingOrchestrator {
|
|
private app: App;
|
|
private settings: PluginSettings;
|
|
private extractorManager: ExtractorManager;
|
|
private chunker: HybridChunker;
|
|
private embeddingProvider: any;
|
|
private qdrantClient: QdrantClient;
|
|
private collectionManager: CollectionManager;
|
|
private indexingQueue: IndexingQueue;
|
|
private fileWatcher: FileWatcher;
|
|
private fileManifest: FileManifest;
|
|
private isInitialized = false;
|
|
|
|
constructor(app: App, settings: PluginSettings) {
|
|
this.app = app;
|
|
this.settings = settings;
|
|
|
|
// Initialize components
|
|
this.extractorManager = new ExtractorManager(app);
|
|
this.chunker = new HybridChunker(settings.chunking);
|
|
this.embeddingProvider = createEmbeddingProvider(settings);
|
|
this.qdrantClient = new QdrantClient(settings.qdrant);
|
|
this.collectionManager = new CollectionManager(this.qdrantClient, settings, this.getVaultName());
|
|
this.indexingQueue = new IndexingQueue(
|
|
this.extractorManager,
|
|
this.chunker,
|
|
this.embeddingProvider,
|
|
this.collectionManager
|
|
);
|
|
this.fileWatcher = new FileWatcher(app.vault, this.indexingQueue);
|
|
this.fileManifest = new FileManifest(app);
|
|
}
|
|
|
|
/**
|
|
* Initialize the indexing system
|
|
*/
|
|
async initialize(): Promise<void> {
|
|
if (this.isInitialized) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Load file manifest
|
|
await this.fileManifest.load();
|
|
|
|
// Initialize embedding provider and get dimension
|
|
const embeddingDimension = await this.embeddingProvider.getDimension();
|
|
|
|
// Initialize Qdrant collection
|
|
await this.collectionManager.initialize(embeddingDimension);
|
|
|
|
// Start file watching
|
|
this.fileWatcher.startWatching();
|
|
|
|
this.isInitialized = true;
|
|
console.log('Indexing orchestrator initialized successfully');
|
|
} catch (error) {
|
|
console.error('Failed to initialize indexing orchestrator:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Shutdown the indexing system
|
|
*/
|
|
async shutdown(): Promise<void> {
|
|
if (!this.isInitialized) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
// Stop file watching
|
|
this.fileWatcher.stopWatching();
|
|
|
|
// Stop indexing queue
|
|
this.indexingQueue.stopProcessing();
|
|
|
|
// Save file manifest
|
|
await this.fileManifest.save();
|
|
|
|
this.isInitialized = false;
|
|
console.log('Indexing orchestrator shutdown successfully');
|
|
} catch (error) {
|
|
console.error('Failed to shutdown indexing orchestrator:', error);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Perform full vault indexing
|
|
*/
|
|
async indexFullVault(): Promise<void> {
|
|
if (!this.isInitialized) {
|
|
throw new Error('Indexing orchestrator not initialized');
|
|
}
|
|
|
|
try {
|
|
// Get all files that can be indexed
|
|
const allFiles = this.getIndexableFiles();
|
|
|
|
// Get files that need re-indexing
|
|
const filesToIndex = this.fileManifest.getFilesNeedingReindexing(allFiles);
|
|
|
|
// Get orphaned files (files in manifest but not in vault)
|
|
const orphanedFiles = this.fileManifest.getOrphanedFiles(allFiles);
|
|
|
|
// Add files to indexing queue
|
|
this.indexingQueue.addFiles(filesToIndex, 'update');
|
|
|
|
// Add orphaned files for deletion
|
|
for (const orphanedPath of orphanedFiles) {
|
|
const orphanedFile = this.app.vault.getAbstractFileByPath(orphanedPath);
|
|
if (orphanedFile instanceof TFile) {
|
|
this.indexingQueue.addFile(orphanedFile, 'delete');
|
|
}
|
|
}
|
|
|
|
// Start processing
|
|
await this.indexingQueue.startProcessing();
|
|
} catch (error) {
|
|
console.error('Failed to index full vault:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Index a specific file
|
|
*/
|
|
async indexFile(file: TFile): Promise<void> {
|
|
if (!this.isInitialized) {
|
|
throw new Error('Indexing orchestrator not initialized');
|
|
}
|
|
|
|
this.indexingQueue.addFile(file, 'update');
|
|
await this.indexingQueue.startProcessing();
|
|
}
|
|
|
|
/**
|
|
* Delete a file from the index
|
|
*/
|
|
async deleteFile(file: TFile): Promise<void> {
|
|
if (!this.isInitialized) {
|
|
throw new Error('Indexing orchestrator not initialized');
|
|
}
|
|
|
|
this.indexingQueue.addFile(file, 'delete');
|
|
await this.indexingQueue.startProcessing();
|
|
}
|
|
|
|
/**
|
|
* Get indexing progress
|
|
*/
|
|
getProgress(): IndexingProgress {
|
|
return this.indexingQueue.getProgress();
|
|
}
|
|
|
|
/**
|
|
* Set progress callback
|
|
*/
|
|
setProgressCallback(callback: (progress: IndexingProgress) => void): void {
|
|
this.indexingQueue.setProgressCallback(callback);
|
|
}
|
|
|
|
/**
|
|
* Set error callback
|
|
*/
|
|
setErrorCallback(callback: (error: string) => void): void {
|
|
this.indexingQueue.setErrorCallback(callback);
|
|
}
|
|
|
|
/**
|
|
* Get index statistics
|
|
*/
|
|
async getIndexStats(): Promise<{
|
|
collectionStats: any;
|
|
manifestStats: any;
|
|
queueStats: any;
|
|
}> {
|
|
const collectionStats = await this.collectionManager.getStats();
|
|
const manifestStats = this.fileManifest.getStats();
|
|
const queueStats = this.indexingQueue.getQueueStats();
|
|
|
|
return {
|
|
collectionStats,
|
|
manifestStats,
|
|
queueStats
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Clear the entire index
|
|
*/
|
|
async clearIndex(): Promise<void> {
|
|
if (!this.isInitialized) {
|
|
throw new Error('Indexing orchestrator not initialized');
|
|
}
|
|
|
|
try {
|
|
// Clear Qdrant collection
|
|
await this.collectionManager.clearCollection();
|
|
|
|
// Clear file manifest
|
|
this.fileManifest.clear();
|
|
await this.fileManifest.save();
|
|
|
|
// Clear indexing queue
|
|
this.indexingQueue.clearQueue();
|
|
|
|
console.log('Index cleared successfully');
|
|
} catch (error) {
|
|
console.error('Failed to clear index:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get files that can be indexed
|
|
*/
|
|
private getIndexableFiles(): TFile[] {
|
|
const files = this.app.vault.getFiles();
|
|
|
|
return files.filter(file => {
|
|
// Check file size
|
|
if (file.stat.size > this.settings.indexing.maxFileSize) {
|
|
return false;
|
|
}
|
|
|
|
// Check ignored folders
|
|
const pathParts = file.path.split('/');
|
|
for (const part of pathParts) {
|
|
if (this.settings.indexing.ignoredFolders.includes(part)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Check include patterns
|
|
const matchesInclude = this.settings.indexing.includePatterns.some(pattern => {
|
|
const regex = new RegExp(pattern.replace(/\*/g, '.*'));
|
|
return regex.test(file.path);
|
|
});
|
|
|
|
if (!matchesInclude) {
|
|
return false;
|
|
}
|
|
|
|
// Check exclude patterns
|
|
const matchesExclude = this.settings.indexing.excludePatterns.some(pattern => {
|
|
const regex = new RegExp(pattern.replace(/\*/g, '.*'));
|
|
return regex.test(file.path);
|
|
});
|
|
|
|
if (matchesExclude) {
|
|
return false;
|
|
}
|
|
|
|
// Check if extractor can handle the file
|
|
return this.extractorManager.canHandle(file);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Get vault name
|
|
*/
|
|
private getVaultName(): string {
|
|
return this.app.vault.getName();
|
|
}
|
|
|
|
/**
|
|
* Check if the system is initialized
|
|
*/
|
|
isReady(): boolean {
|
|
return this.isInitialized;
|
|
}
|
|
|
|
/**
|
|
* Get extractor status
|
|
*/
|
|
getExtractorStatus(): any[] {
|
|
return this.extractorManager.getExtractorStatus();
|
|
}
|
|
|
|
/**
|
|
* Test connections
|
|
*/
|
|
async testConnections(): Promise<{
|
|
qdrant: boolean;
|
|
embedding: boolean;
|
|
}> {
|
|
const qdrantTest = await this.qdrantClient.testConnection();
|
|
const embeddingTest = await this.embeddingProvider.testConnection();
|
|
|
|
return {
|
|
qdrant: qdrantTest,
|
|
embedding: embeddingTest
|
|
};
|
|
}
|
|
}
|