import { TFile } from 'obsidian'; import { BaseExtractor } from './base'; import { ExtractedContent } from '../types'; export class MarkdownExtractor extends BaseExtractor { canHandle(file: TFile): boolean { return file.extension === 'md'; } async extract(file: TFile): Promise { const content = await this.getFileContent(file); const metadata = this.createBaseMetadata(file); // Extract frontmatter and body const { frontmatter, body } = this.extractFrontmatter(content); // Extract markdown elements const { headings, tags, links } = this.extractMarkdownElements(body); // Update metadata with extracted information metadata.h1 = headings.filter(h => h.length > 0); metadata.tags = [...new Set(tags)]; // Remove duplicates metadata.links = [...new Set(links)]; // Remove duplicates metadata.fm = frontmatter; // Extract aliases from frontmatter if (frontmatter.aliases) { metadata.aliases = Array.isArray(frontmatter.aliases) ? frontmatter.aliases : [frontmatter.aliases]; } // Extract title from frontmatter if present if (frontmatter.title) { metadata.title = frontmatter.title; } return { text: body, metadata, pageNumbers: undefined // Markdown files don't have page numbers }; } }