- Complete plugin architecture with modular design - Qdrant client with HTTP integration using requestUrl - Ollama and OpenAI embedding providers with batching - Hybrid chunking (semantic + size-based fallback) - Content extractors for markdown, code, PDFs, and images - Real-time indexing with file watcher and queue - Search modal with keyboard navigation - Comprehensive settings UI with connection testing - Graph visualization framework (basic implementation) - Full TypeScript types and error handling - Desktop-only plugin with status bar integration - Complete documentation and setup guide Features implemented: ✅ Semantic search with vector embeddings ✅ Multiple embedding providers (Ollama/OpenAI) ✅ Rich content extraction (markdown, code, PDFs, images) ✅ Smart chunking with heading-based splits ✅ Real-time file indexing with progress tracking ✅ Standalone search interface ✅ Comprehensive settings and configuration ✅ Graph view foundation for document relationships ✅ Full error handling and logging ✅ Complete documentation and troubleshooting guide Ready for testing with Qdrant instance and embedding provider setup.
45 lines
1.3 KiB
TypeScript
45 lines
1.3 KiB
TypeScript
import { TFile } from 'obsidian';
|
|
import { BaseExtractor } from './base';
|
|
import { ExtractedContent } from '../types';
|
|
|
|
export class MarkdownExtractor extends BaseExtractor {
|
|
canHandle(file: TFile): boolean {
|
|
return file.extension === 'md';
|
|
}
|
|
|
|
async extract(file: TFile): Promise<ExtractedContent> {
|
|
const content = await this.getFileContent(file);
|
|
const metadata = this.createBaseMetadata(file);
|
|
|
|
// Extract frontmatter and body
|
|
const { frontmatter, body } = this.extractFrontmatter(content);
|
|
|
|
// Extract markdown elements
|
|
const { headings, tags, links } = this.extractMarkdownElements(body);
|
|
|
|
// Update metadata with extracted information
|
|
metadata.h1 = headings.filter(h => h.length > 0);
|
|
metadata.tags = [...new Set(tags)]; // Remove duplicates
|
|
metadata.links = [...new Set(links)]; // Remove duplicates
|
|
metadata.fm = frontmatter;
|
|
|
|
// Extract aliases from frontmatter
|
|
if (frontmatter.aliases) {
|
|
metadata.aliases = Array.isArray(frontmatter.aliases)
|
|
? frontmatter.aliases
|
|
: [frontmatter.aliases];
|
|
}
|
|
|
|
// Extract title from frontmatter if present
|
|
if (frontmatter.title) {
|
|
metadata.title = frontmatter.title;
|
|
}
|
|
|
|
return {
|
|
text: body,
|
|
metadata,
|
|
pageNumbers: undefined // Markdown files don't have page numbers
|
|
};
|
|
}
|
|
}
|