feat: normalize LaTeX fragments in markdown rendering (#6488)
* feat: normalize LaTeX fragments in markdown rendering
Added a preprocessing step that converts LaTeX delimiters `\[…\]` to `$$…$$` and `\(...\)` to `$…$` before rendering. The function skips code blocks, inline code, and HTML tags to avoid unintended transformations. This improves authoring experience by supporting common LaTeX syntax without requiring explicit `$` delimiters.
* fix: correct inline LaTeX normalization replacement
The replacement function for inline math (`\(...\)`) incorrectly accepted a fourth
parameter (`post`) and appended it to the result, which could introduce stray
characters or `undefined` into the rendered output. Updated the function to
use only the captured prefix and inner content and removed the extraneous
`${post}` interpolation, ensuring clean LaTeX conversion.
* feat: optimize markdown rendering with LaTeX caching and memoized code blocks
- Added cache to normalizeLatex to avoid reprocessing repeated content
- Introduced CodeComponent with stable IDs and memoization to reduce re-renders
- Replaced per-render code block ID mapping with hash-based IDs
- Memoized copy handler and normalized markdown content
- Simplified plugin/component setup with stable references
- Added custom comparison for RenderMarkdown memoization to prevent unnecessary updates
* refactor: memoize content only
---------
Co-authored-by: Louis <louis@jan.ai>
This commit is contained in:
parent
c46e13b8b1
commit
38ad8deae2
@ -7,7 +7,7 @@ import remarkBreaks from 'remark-breaks'
|
||||
import rehypeKatex from 'rehype-katex'
|
||||
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'
|
||||
import * as prismStyles from 'react-syntax-highlighter/dist/cjs/styles/prism'
|
||||
import { memo, useState, useMemo, useRef, useEffect } from 'react'
|
||||
import { memo, useState, useMemo, useCallback } from 'react'
|
||||
import { getReadableLanguageName } from '@/lib/utils'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { useCodeblock } from '@/hooks/useCodeblock'
|
||||
@ -25,61 +25,104 @@ interface MarkdownProps {
|
||||
isWrapping?: boolean
|
||||
}
|
||||
|
||||
function RenderMarkdownComponent({
|
||||
content,
|
||||
enableRawHtml,
|
||||
className,
|
||||
isUser,
|
||||
components,
|
||||
isWrapping,
|
||||
}: MarkdownProps) {
|
||||
const { t } = useTranslation()
|
||||
const { codeBlockStyle, showLineNumbers } = useCodeblock()
|
||||
// Cache for normalized LaTeX content
|
||||
const latexCache = new Map<string, string>()
|
||||
|
||||
// State for tracking which code block has been copied
|
||||
const [copiedId, setCopiedId] = useState<string | null>(null)
|
||||
// Map to store unique IDs for code blocks based on content and position
|
||||
const codeBlockIds = useRef(new Map<string, string>())
|
||||
|
||||
// Clear ID map when content changes
|
||||
useEffect(() => {
|
||||
codeBlockIds.current.clear()
|
||||
}, [content])
|
||||
|
||||
// Function to handle copying code to clipboard
|
||||
const handleCopy = (code: string, id: string) => {
|
||||
navigator.clipboard.writeText(code)
|
||||
setCopiedId(id)
|
||||
|
||||
// Reset copied state after 2 seconds
|
||||
setTimeout(() => {
|
||||
setCopiedId(null)
|
||||
}, 2000)
|
||||
/**
|
||||
* Optimized preprocessor: normalize LaTeX fragments into $ / $$.
|
||||
* Uses caching to avoid reprocessing the same content.
|
||||
*/
|
||||
const normalizeLatex = (input: string): string => {
|
||||
// Check cache first
|
||||
if (latexCache.has(input)) {
|
||||
return latexCache.get(input)!
|
||||
}
|
||||
|
||||
// Default components for syntax highlighting and emoji rendering
|
||||
const defaultComponents: Components = useMemo(
|
||||
() => ({
|
||||
code: ({ className, children, ...props }) => {
|
||||
const segments = input.split(/(```[\s\S]*?```|`[^`]*`|<[^>]+>)/g)
|
||||
|
||||
const result = segments
|
||||
.map((segment) => {
|
||||
if (!segment) return ''
|
||||
|
||||
// Skip code blocks, inline code, html tags
|
||||
if (/^```[\s\S]*```$/.test(segment)) return segment
|
||||
if (/^`[^`]*`$/.test(segment)) return segment
|
||||
if (/^<[^>]+>$/.test(segment)) return segment
|
||||
|
||||
let s = segment
|
||||
|
||||
// --- Display math: \[...\] surrounded by newlines
|
||||
s = s.replace(
|
||||
/(^|\n)\\\[\s*\n([\s\S]*?)\n\s*\\\](?=\n|$)/g,
|
||||
(_, pre, inner) => `${pre}$$\n${inner.trim()}\n$$`
|
||||
)
|
||||
|
||||
// --- Inline math: space \( ... \)
|
||||
s = s.replace(
|
||||
/(^|[^$\\])\\\((.+?)\\\)(?=[^$\\]|$)/g,
|
||||
(_, pre, inner) => `${pre}$${inner.trim()}$`
|
||||
)
|
||||
|
||||
return s
|
||||
})
|
||||
.join('')
|
||||
|
||||
// Cache the result (with size limit to prevent memory leaks)
|
||||
if (latexCache.size > 100) {
|
||||
const firstKey = latexCache.keys().next().value || ''
|
||||
latexCache.delete(firstKey)
|
||||
}
|
||||
latexCache.set(input, result)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Memoized code component to prevent unnecessary re-renders
|
||||
const CodeComponent = memo(
|
||||
({
|
||||
className,
|
||||
children,
|
||||
isUser,
|
||||
codeBlockStyle,
|
||||
showLineNumbers,
|
||||
isWrapping,
|
||||
onCopy,
|
||||
copiedId,
|
||||
...props
|
||||
}: any) => {
|
||||
const { t } = useTranslation()
|
||||
const match = /language-(\w+)/.exec(className || '')
|
||||
const language = match ? match[1] : ''
|
||||
const isInline = !match || !language
|
||||
|
||||
const code = String(children).replace(/\n$/, '')
|
||||
|
||||
// Generate a unique ID based on content and language
|
||||
const contentKey = `${code}-${language}`
|
||||
let codeId = codeBlockIds.current.get(contentKey)
|
||||
if (!codeId) {
|
||||
codeId = `code-${codeBlockIds.current.size}`
|
||||
codeBlockIds.current.set(contentKey, codeId)
|
||||
// Generate a stable ID based on content hash instead of position
|
||||
const codeId = useMemo(() => {
|
||||
let hash = 0
|
||||
for (let i = 0; i < code.length; i++) {
|
||||
const char = code.charCodeAt(i)
|
||||
hash = (hash << 5) - hash + char
|
||||
hash = hash & hash // Convert to 32-bit integer
|
||||
}
|
||||
return `code-${Math.abs(hash)}-${language}`
|
||||
}, [code, language])
|
||||
|
||||
const handleCopyClick = useCallback(
|
||||
(e: React.MouseEvent) => {
|
||||
e.stopPropagation()
|
||||
onCopy(code, codeId)
|
||||
},
|
||||
[code, codeId, onCopy]
|
||||
)
|
||||
|
||||
if (isInline || isUser) {
|
||||
return <code className={cn(className)}>{children}</code>
|
||||
}
|
||||
|
||||
return !isInline && !isUser ? (
|
||||
return (
|
||||
<div className="relative overflow-hidden border rounded-md border-main-view-fg/2">
|
||||
<style>
|
||||
{/* Disable selection of line numbers. React Syntax Highlighter currently has
|
||||
unfixed bug so we can't use the lineNumberContainerStyleProp */}
|
||||
{`
|
||||
.react-syntax-highlighter-line-number {
|
||||
user-select: none;
|
||||
@ -94,10 +137,7 @@ function RenderMarkdownComponent({
|
||||
{getReadableLanguageName(language)}
|
||||
</span>
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
handleCopy(code, codeId)
|
||||
}}
|
||||
onClick={handleCopyClick}
|
||||
className="flex items-center gap-1 text-xs font-sans transition-colors cursor-pointer"
|
||||
>
|
||||
{copiedId === codeId ? (
|
||||
@ -114,7 +154,6 @@ function RenderMarkdownComponent({
|
||||
</button>
|
||||
</div>
|
||||
<SyntaxHighlighter
|
||||
// @ts-expect-error - Type issues with style prop in react-syntax-highlighter
|
||||
style={
|
||||
prismStyles[
|
||||
codeBlockStyle
|
||||
@ -130,7 +169,6 @@ function RenderMarkdownComponent({
|
||||
language={language}
|
||||
showLineNumbers={showLineNumbers}
|
||||
wrapLines={true}
|
||||
// Temporary comment we try calculate main area width on __root
|
||||
lineProps={
|
||||
isWrapping
|
||||
? {
|
||||
@ -149,40 +187,82 @@ function RenderMarkdownComponent({
|
||||
CodeTag={'code'}
|
||||
{...props}
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
{code}
|
||||
</SyntaxHighlighter>
|
||||
</div>
|
||||
) : (
|
||||
<code className={cn(className)}>{children}</code>
|
||||
)
|
||||
},
|
||||
}),
|
||||
[codeBlockStyle, showLineNumbers, copiedId]
|
||||
}
|
||||
)
|
||||
|
||||
// Memoize the remarkPlugins to prevent unnecessary re-renders
|
||||
CodeComponent.displayName = 'CodeComponent'
|
||||
|
||||
function RenderMarkdownComponent({
|
||||
content,
|
||||
enableRawHtml,
|
||||
className,
|
||||
isUser,
|
||||
components,
|
||||
isWrapping,
|
||||
}: MarkdownProps) {
|
||||
const { codeBlockStyle, showLineNumbers } = useCodeblock()
|
||||
|
||||
// State for tracking which code block has been copied
|
||||
const [copiedId, setCopiedId] = useState<string | null>(null)
|
||||
|
||||
// Memoized copy handler
|
||||
const handleCopy = useCallback((code: string, id: string) => {
|
||||
navigator.clipboard.writeText(code)
|
||||
setCopiedId(id)
|
||||
|
||||
// Reset copied state after 2 seconds
|
||||
setTimeout(() => {
|
||||
setCopiedId(null)
|
||||
}, 2000)
|
||||
}, [])
|
||||
|
||||
// Memoize the normalized content to avoid reprocessing on every render
|
||||
const normalizedContent = useMemo(() => normalizeLatex(content), [content])
|
||||
|
||||
// Stable remarkPlugins reference
|
||||
const remarkPlugins = useMemo(() => {
|
||||
// Using a simpler configuration to avoid TypeScript errors
|
||||
const basePlugins = [remarkGfm, remarkMath, remarkEmoji]
|
||||
// Add remark-breaks for user messages to handle single newlines as line breaks
|
||||
if (isUser) {
|
||||
basePlugins.push(remarkBreaks)
|
||||
}
|
||||
return basePlugins
|
||||
}, [isUser])
|
||||
|
||||
// Memoize the rehypePlugins to prevent unnecessary re-renders
|
||||
// Stable rehypePlugins reference
|
||||
const rehypePlugins = useMemo(() => {
|
||||
return enableRawHtml ? [rehypeKatex, rehypeRaw] : [rehypeKatex]
|
||||
}, [enableRawHtml])
|
||||
|
||||
// Merge custom components with default components
|
||||
const mergedComponents = useMemo(
|
||||
// Memoized components with stable references
|
||||
const markdownComponents: Components = useMemo(
|
||||
() => ({
|
||||
...defaultComponents,
|
||||
code: (props) => (
|
||||
<CodeComponent
|
||||
{...props}
|
||||
isUser={isUser}
|
||||
codeBlockStyle={codeBlockStyle}
|
||||
showLineNumbers={showLineNumbers}
|
||||
isWrapping={isWrapping}
|
||||
onCopy={handleCopy}
|
||||
copiedId={copiedId}
|
||||
/>
|
||||
),
|
||||
// Add other optimized components if needed
|
||||
...components,
|
||||
}),
|
||||
[defaultComponents, components]
|
||||
[
|
||||
isUser,
|
||||
codeBlockStyle,
|
||||
showLineNumbers,
|
||||
isWrapping,
|
||||
handleCopy,
|
||||
copiedId,
|
||||
components,
|
||||
]
|
||||
)
|
||||
|
||||
// Render the markdown content
|
||||
@ -197,14 +277,14 @@ function RenderMarkdownComponent({
|
||||
<ReactMarkdown
|
||||
remarkPlugins={remarkPlugins}
|
||||
rehypePlugins={rehypePlugins}
|
||||
components={mergedComponents}
|
||||
components={markdownComponents}
|
||||
>
|
||||
{content}
|
||||
{normalizedContent}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Use a simple memo without custom comparison to allow re-renders when content changes
|
||||
// This is important for streaming content to render incrementally
|
||||
export const RenderMarkdown = memo(RenderMarkdownComponent)
|
||||
export const RenderMarkdown = memo(
|
||||
RenderMarkdownComponent,
|
||||
(prevProps, nextProps) => prevProps.content === nextProps.content
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user