This commit resolves several critical issues that prevented the plugin from working correctly with Qdrant and adds essential metadata to indexed chunks. **Settings & Configuration:** - Fix settings initialization using deep merge instead of shallow Object.assign - Prevents nested settings from being lost during load - Ensures all default values are properly preserved - Add orchestrator reinitialization when settings are saved - Ensures QdrantClient and embedding providers use updated settings - Fixes issue where plugin used localhost instead of saved HTTPS URL **UUID Generation:** - Fix generateDeterministicUUID() creating invalid UUIDs - Was generating 35-character UUIDs instead of proper 36-character format - Now correctly creates valid UUID v4 format: xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx - Properly generates segment 5 (12 hex chars) from combined hash data - Fixes segment 4 to start with 8/9/a/b per UUID spec - Resolves Qdrant API rejections: "value X is not a valid point ID" **Chunk Metadata:** - Add chunk_text field to ChunkMetadata type - Stores the actual text content of each chunk in Qdrant payload - Essential for displaying search results and content preview - Add model name to chunk metadata - Populates model field with embedding provider name (e.g., "nomic-embed-text") - Enables tracking which model generated each embedding - Supports future multi-model collections **Debug Logging:** - Add logging for settings loading and URL tracking - Add logging for QdrantClient initialization - Add logging for orchestrator creation with settings **Documentation:** - Add CLAUDE.md with comprehensive architecture documentation - Build commands and development workflow - Core components and data processing pipeline - Important implementation details and debugging guide 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
284 lines
7.7 KiB
TypeScript
284 lines
7.7 KiB
TypeScript
import { TFile } from 'obsidian';
|
|
import { IndexingQueueItem, IndexingProgress } from '../types';
|
|
import { ExtractorManager } from '../extractors';
|
|
import { HybridChunker } from '../chunking/chunker';
|
|
import { EmbeddingProviderInterface } from '../types';
|
|
import { CollectionManager } from '../qdrant/collection';
|
|
import { CollectionManager as QdrantCollectionManager } from '../qdrant/collection';
|
|
import { generateDeterministicUUID } from '../utils/hash';
|
|
|
|
export class IndexingQueue {
|
|
private queue: IndexingQueueItem[] = [];
|
|
private isProcessing = false;
|
|
private progress: IndexingProgress;
|
|
private extractorManager: ExtractorManager;
|
|
private chunker: HybridChunker;
|
|
private embeddingProvider: EmbeddingProviderInterface;
|
|
private collectionManager: QdrantCollectionManager;
|
|
private onProgressUpdate?: (progress: IndexingProgress) => void;
|
|
private onError?: (error: string) => void;
|
|
private maxConcurrency = 3;
|
|
private batchSize = 10;
|
|
|
|
constructor(
|
|
extractorManager: ExtractorManager,
|
|
chunker: HybridChunker,
|
|
embeddingProvider: EmbeddingProviderInterface,
|
|
collectionManager: QdrantCollectionManager
|
|
) {
|
|
this.extractorManager = extractorManager;
|
|
this.chunker = chunker;
|
|
this.embeddingProvider = embeddingProvider;
|
|
this.collectionManager = collectionManager;
|
|
|
|
this.progress = {
|
|
totalFiles: 0,
|
|
processedFiles: 0,
|
|
totalChunks: 0,
|
|
processedChunks: 0,
|
|
errors: [],
|
|
isRunning: false
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Add files to the indexing queue
|
|
*/
|
|
addFiles(files: TFile[], action: 'create' | 'update' | 'delete' = 'update'): void {
|
|
for (const file of files) {
|
|
this.addFile(file, action);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add a single file to the indexing queue
|
|
*/
|
|
addFile(file: TFile, action: 'create' | 'update' | 'delete' = 'update'): void {
|
|
const priority = action === 'delete' ? 0 : (action === 'create' ? 2 : 1);
|
|
|
|
const item: IndexingQueueItem = {
|
|
file,
|
|
action,
|
|
priority
|
|
};
|
|
|
|
// Remove existing entry for this file
|
|
this.queue = this.queue.filter(item => item.file.path !== file.path);
|
|
|
|
// Add new entry
|
|
this.queue.push(item);
|
|
|
|
// Sort by priority (higher priority first)
|
|
this.queue.sort((a, b) => b.priority - a.priority);
|
|
}
|
|
|
|
/**
|
|
* Start processing the queue
|
|
*/
|
|
async startProcessing(): Promise<void> {
|
|
if (this.isProcessing) {
|
|
return;
|
|
}
|
|
|
|
this.isProcessing = true;
|
|
this.progress.isRunning = true;
|
|
this.progress.totalFiles = this.queue.length;
|
|
this.progress.processedFiles = 0;
|
|
this.progress.errors = [];
|
|
|
|
this.updateProgress();
|
|
|
|
try {
|
|
await this.processQueue();
|
|
} finally {
|
|
this.isProcessing = false;
|
|
this.progress.isRunning = false;
|
|
this.updateProgress();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Stop processing the queue
|
|
*/
|
|
stopProcessing(): void {
|
|
this.isProcessing = false;
|
|
this.progress.isRunning = false;
|
|
this.updateProgress();
|
|
}
|
|
|
|
/**
|
|
* Clear the queue
|
|
*/
|
|
clearQueue(): void {
|
|
this.queue = [];
|
|
this.progress.totalFiles = 0;
|
|
this.progress.processedFiles = 0;
|
|
this.updateProgress();
|
|
}
|
|
|
|
/**
|
|
* Get current progress
|
|
*/
|
|
getProgress(): IndexingProgress {
|
|
return { ...this.progress };
|
|
}
|
|
|
|
/**
|
|
* Set progress update callback
|
|
*/
|
|
setProgressCallback(callback: (progress: IndexingProgress) => void): void {
|
|
this.onProgressUpdate = callback;
|
|
}
|
|
|
|
/**
|
|
* Set error callback
|
|
*/
|
|
setErrorCallback(callback: (error: string) => void): void {
|
|
this.onError = callback;
|
|
}
|
|
|
|
private async processQueue(): Promise<void> {
|
|
while (this.queue.length > 0 && this.isProcessing) {
|
|
const batch = this.queue.splice(0, this.batchSize);
|
|
await this.processBatch(batch);
|
|
}
|
|
}
|
|
|
|
private async processBatch(batch: IndexingQueueItem[]): Promise<void> {
|
|
const promises = batch.map(item => this.processItem(item));
|
|
await Promise.allSettled(promises);
|
|
}
|
|
|
|
private async processItem(item: IndexingQueueItem): Promise<void> {
|
|
try {
|
|
this.progress.currentFile = item.file.path;
|
|
this.updateProgress();
|
|
|
|
if (item.action === 'delete') {
|
|
await this.deleteFile(item.file);
|
|
} else {
|
|
await this.indexFile(item.file);
|
|
}
|
|
|
|
this.progress.processedFiles++;
|
|
this.updateProgress();
|
|
} catch (error) {
|
|
const errorMessage = `Failed to process ${item.file.path}: ${error}`;
|
|
this.progress.errors.push(errorMessage);
|
|
this.onError?.(errorMessage);
|
|
console.error(errorMessage, error);
|
|
}
|
|
}
|
|
|
|
private async indexFile(file: TFile): Promise<void> {
|
|
try {
|
|
// Check if file can be handled
|
|
if (!this.extractorManager.canHandle(file)) {
|
|
console.log(`Skipping file ${file.path} - no suitable extractor`);
|
|
return;
|
|
}
|
|
|
|
// Extract content
|
|
const extractedContent = await this.extractorManager.extract(file);
|
|
|
|
if (!extractedContent.text.trim()) {
|
|
console.log(`Skipping file ${file.path} - no text content`);
|
|
return;
|
|
}
|
|
|
|
// Chunk content
|
|
const chunks = await this.chunker.chunk(extractedContent);
|
|
|
|
if (chunks.length === 0) {
|
|
console.log(`Skipping file ${file.path} - no chunks created`);
|
|
return;
|
|
}
|
|
|
|
this.progress.totalChunks += chunks.length;
|
|
|
|
// Extract chunk texts and generate embeddings
|
|
const texts = chunks.map(chunk => extractedContent.text.substring(chunk.chunk_start, chunk.chunk_end));
|
|
const embeddings = await this.embeddingProvider.embed(texts);
|
|
|
|
// Get model name from the embedding provider
|
|
const modelName = this.embeddingProvider.getName();
|
|
|
|
// Prepare points for Qdrant
|
|
const points = chunks.map((chunk, index) => ({
|
|
id: this.generatePointId(file, chunk.chunk_index),
|
|
vector: embeddings[index],
|
|
metadata: {
|
|
...chunk,
|
|
model: modelName, // Set the model name for each chunk
|
|
chunk_text: texts[index] // Include the actual text content
|
|
}
|
|
}));
|
|
|
|
// Index in Qdrant
|
|
await this.collectionManager.indexChunks(points);
|
|
|
|
this.progress.processedChunks += chunks.length;
|
|
this.updateProgress();
|
|
|
|
} catch (error) {
|
|
throw new Error(`Failed to index file ${file.path}: ${error}`);
|
|
}
|
|
}
|
|
|
|
private async deleteFile(file: TFile): Promise<void> {
|
|
try {
|
|
// Delete all chunks for this file from Qdrant
|
|
await this.collectionManager.deleteFileChunks(file.path);
|
|
} catch (error) {
|
|
throw new Error(`Failed to delete file ${file.path}: ${error}`);
|
|
}
|
|
}
|
|
|
|
private generatePointId(file: TFile, chunkIndex: number): string {
|
|
// Generate a deterministic UUID based on file path and chunk index
|
|
// This ensures the same file+chunk always gets the same ID
|
|
const idString = `${file.path}:${chunkIndex}`;
|
|
return generateDeterministicUUID(idString);
|
|
}
|
|
|
|
private updateProgress(): void {
|
|
this.onProgressUpdate?.(this.getProgress());
|
|
}
|
|
|
|
/**
|
|
* Get queue statistics
|
|
*/
|
|
getQueueStats(): {
|
|
queueLength: number;
|
|
isProcessing: boolean;
|
|
estimatedTimeRemaining: number;
|
|
} {
|
|
const averageTimePerFile = 2000; // 2 seconds per file (rough estimate)
|
|
const estimatedTimeRemaining = this.queue.length * averageTimePerFile;
|
|
|
|
return {
|
|
queueLength: this.queue.length,
|
|
isProcessing: this.isProcessing,
|
|
estimatedTimeRemaining
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get files in queue by action type
|
|
*/
|
|
getFilesByAction(action: 'create' | 'update' | 'delete'): TFile[] {
|
|
return this.queue
|
|
.filter(item => item.action === action)
|
|
.map(item => item.file);
|
|
}
|
|
|
|
/**
|
|
* Remove files from queue
|
|
*/
|
|
removeFiles(filePaths: string[]): void {
|
|
this.queue = this.queue.filter(item => !filePaths.includes(item.file.path));
|
|
this.progress.totalFiles = this.queue.length;
|
|
this.updateProgress();
|
|
}
|
|
}
|