feat: normalize LaTeX fragments in markdown rendering (#6488)

* feat: normalize LaTeX fragments in markdown rendering

Added a preprocessing step that converts LaTeX delimiters `\[…\]` to `$$…$$` and `\(...\)` to `$…$` before rendering. The function skips code blocks, inline code, and HTML tags to avoid unintended transformations. This improves authoring experience by supporting common LaTeX syntax without requiring explicit `$` delimiters.

* fix: correct inline LaTeX normalization replacement

The replacement function for inline math (`\(...\)`) incorrectly accepted a fourth
parameter (`post`) and appended it to the result, which could introduce stray
characters or `undefined` into the rendered output. Updated the function to
use only the captured prefix and inner content and removed the extraneous
`${post}` interpolation, ensuring clean LaTeX conversion.

* feat: optimize markdown rendering with LaTeX caching and memoized code blocks

- Added cache to normalizeLatex to avoid reprocessing repeated content
- Introduced CodeComponent with stable IDs and memoization to reduce re-renders
- Replaced per-render code block ID mapping with hash-based IDs
- Memoized copy handler and normalized markdown content
- Simplified plugin/component setup with stable references
- Added custom comparison for RenderMarkdown memoization to prevent unnecessary updates

* refactor: memoize content only

---------

Co-authored-by: Louis <louis@jan.ai>
This commit is contained in:
Akarshan Biswas 2025-09-24 07:19:32 +05:30 committed by GitHub
parent c46e13b8b1
commit 38ad8deae2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -7,7 +7,7 @@ import remarkBreaks from 'remark-breaks'
import rehypeKatex from 'rehype-katex'
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'
import * as prismStyles from 'react-syntax-highlighter/dist/cjs/styles/prism'
import { memo, useState, useMemo, useRef, useEffect } from 'react'
import { memo, useState, useMemo, useCallback } from 'react'
import { getReadableLanguageName } from '@/lib/utils'
import { cn } from '@/lib/utils'
import { useCodeblock } from '@/hooks/useCodeblock'
@ -25,61 +25,104 @@ interface MarkdownProps {
isWrapping?: boolean
}
function RenderMarkdownComponent({
content,
enableRawHtml,
className,
isUser,
components,
isWrapping,
}: MarkdownProps) {
const { t } = useTranslation()
const { codeBlockStyle, showLineNumbers } = useCodeblock()
// Cache for normalized LaTeX content
const latexCache = new Map<string, string>()
// State for tracking which code block has been copied
const [copiedId, setCopiedId] = useState<string | null>(null)
// Map to store unique IDs for code blocks based on content and position
const codeBlockIds = useRef(new Map<string, string>())
// Clear ID map when content changes
useEffect(() => {
codeBlockIds.current.clear()
}, [content])
// Function to handle copying code to clipboard
const handleCopy = (code: string, id: string) => {
navigator.clipboard.writeText(code)
setCopiedId(id)
// Reset copied state after 2 seconds
setTimeout(() => {
setCopiedId(null)
}, 2000)
/**
* Optimized preprocessor: normalize LaTeX fragments into $ / $$.
* Uses caching to avoid reprocessing the same content.
*/
const normalizeLatex = (input: string): string => {
// Check cache first
if (latexCache.has(input)) {
return latexCache.get(input)!
}
// Default components for syntax highlighting and emoji rendering
const defaultComponents: Components = useMemo(
() => ({
code: ({ className, children, ...props }) => {
const segments = input.split(/(```[\s\S]*?```|`[^`]*`|<[^>]+>)/g)
const result = segments
.map((segment) => {
if (!segment) return ''
// Skip code blocks, inline code, html tags
if (/^```[\s\S]*```$/.test(segment)) return segment
if (/^`[^`]*`$/.test(segment)) return segment
if (/^<[^>]+>$/.test(segment)) return segment
let s = segment
// --- Display math: \[...\] surrounded by newlines
s = s.replace(
/(^|\n)\\\[\s*\n([\s\S]*?)\n\s*\\\](?=\n|$)/g,
(_, pre, inner) => `${pre}$$\n${inner.trim()}\n$$`
)
// --- Inline math: space \( ... \)
s = s.replace(
/(^|[^$\\])\\\((.+?)\\\)(?=[^$\\]|$)/g,
(_, pre, inner) => `${pre}$${inner.trim()}$`
)
return s
})
.join('')
// Cache the result (with size limit to prevent memory leaks)
if (latexCache.size > 100) {
const firstKey = latexCache.keys().next().value || ''
latexCache.delete(firstKey)
}
latexCache.set(input, result)
return result
}
// Memoized code component to prevent unnecessary re-renders
const CodeComponent = memo(
({
className,
children,
isUser,
codeBlockStyle,
showLineNumbers,
isWrapping,
onCopy,
copiedId,
...props
}: any) => {
const { t } = useTranslation()
const match = /language-(\w+)/.exec(className || '')
const language = match ? match[1] : ''
const isInline = !match || !language
const code = String(children).replace(/\n$/, '')
// Generate a unique ID based on content and language
const contentKey = `${code}-${language}`
let codeId = codeBlockIds.current.get(contentKey)
if (!codeId) {
codeId = `code-${codeBlockIds.current.size}`
codeBlockIds.current.set(contentKey, codeId)
// Generate a stable ID based on content hash instead of position
const codeId = useMemo(() => {
let hash = 0
for (let i = 0; i < code.length; i++) {
const char = code.charCodeAt(i)
hash = (hash << 5) - hash + char
hash = hash & hash // Convert to 32-bit integer
}
return `code-${Math.abs(hash)}-${language}`
}, [code, language])
const handleCopyClick = useCallback(
(e: React.MouseEvent) => {
e.stopPropagation()
onCopy(code, codeId)
},
[code, codeId, onCopy]
)
if (isInline || isUser) {
return <code className={cn(className)}>{children}</code>
}
return !isInline && !isUser ? (
return (
<div className="relative overflow-hidden border rounded-md border-main-view-fg/2">
<style>
{/* Disable selection of line numbers. React Syntax Highlighter currently has
unfixed bug so we can't use the lineNumberContainerStyleProp */}
{`
.react-syntax-highlighter-line-number {
user-select: none;
@ -94,10 +137,7 @@ function RenderMarkdownComponent({
{getReadableLanguageName(language)}
</span>
<button
onClick={(e) => {
e.stopPropagation()
handleCopy(code, codeId)
}}
onClick={handleCopyClick}
className="flex items-center gap-1 text-xs font-sans transition-colors cursor-pointer"
>
{copiedId === codeId ? (
@ -114,7 +154,6 @@ function RenderMarkdownComponent({
</button>
</div>
<SyntaxHighlighter
// @ts-expect-error - Type issues with style prop in react-syntax-highlighter
style={
prismStyles[
codeBlockStyle
@ -130,7 +169,6 @@ function RenderMarkdownComponent({
language={language}
showLineNumbers={showLineNumbers}
wrapLines={true}
// Temporary comment we try calculate main area width on __root
lineProps={
isWrapping
? {
@ -149,40 +187,82 @@ function RenderMarkdownComponent({
CodeTag={'code'}
{...props}
>
{String(children).replace(/\n$/, '')}
{code}
</SyntaxHighlighter>
</div>
) : (
<code className={cn(className)}>{children}</code>
)
},
}),
[codeBlockStyle, showLineNumbers, copiedId]
}
)
// Memoize the remarkPlugins to prevent unnecessary re-renders
CodeComponent.displayName = 'CodeComponent'
function RenderMarkdownComponent({
content,
enableRawHtml,
className,
isUser,
components,
isWrapping,
}: MarkdownProps) {
const { codeBlockStyle, showLineNumbers } = useCodeblock()
// State for tracking which code block has been copied
const [copiedId, setCopiedId] = useState<string | null>(null)
// Memoized copy handler
const handleCopy = useCallback((code: string, id: string) => {
navigator.clipboard.writeText(code)
setCopiedId(id)
// Reset copied state after 2 seconds
setTimeout(() => {
setCopiedId(null)
}, 2000)
}, [])
// Memoize the normalized content to avoid reprocessing on every render
const normalizedContent = useMemo(() => normalizeLatex(content), [content])
// Stable remarkPlugins reference
const remarkPlugins = useMemo(() => {
// Using a simpler configuration to avoid TypeScript errors
const basePlugins = [remarkGfm, remarkMath, remarkEmoji]
// Add remark-breaks for user messages to handle single newlines as line breaks
if (isUser) {
basePlugins.push(remarkBreaks)
}
return basePlugins
}, [isUser])
// Memoize the rehypePlugins to prevent unnecessary re-renders
// Stable rehypePlugins reference
const rehypePlugins = useMemo(() => {
return enableRawHtml ? [rehypeKatex, rehypeRaw] : [rehypeKatex]
}, [enableRawHtml])
// Merge custom components with default components
const mergedComponents = useMemo(
// Memoized components with stable references
const markdownComponents: Components = useMemo(
() => ({
...defaultComponents,
code: (props) => (
<CodeComponent
{...props}
isUser={isUser}
codeBlockStyle={codeBlockStyle}
showLineNumbers={showLineNumbers}
isWrapping={isWrapping}
onCopy={handleCopy}
copiedId={copiedId}
/>
),
// Add other optimized components if needed
...components,
}),
[defaultComponents, components]
[
isUser,
codeBlockStyle,
showLineNumbers,
isWrapping,
handleCopy,
copiedId,
components,
]
)
// Render the markdown content
@ -197,14 +277,14 @@ function RenderMarkdownComponent({
<ReactMarkdown
remarkPlugins={remarkPlugins}
rehypePlugins={rehypePlugins}
components={mergedComponents}
components={markdownComponents}
>
{content}
{normalizedContent}
</ReactMarkdown>
</div>
)
}
// Use a simple memo without custom comparison to allow re-renders when content changes
// This is important for streaming content to render incrementally
export const RenderMarkdown = memo(RenderMarkdownComponent)
export const RenderMarkdown = memo(
RenderMarkdownComponent,
(prevProps, nextProps) => prevProps.content === nextProps.content
)