feat: Add image visualization for tool_output steps

Implement support for displaying images returned in the Multi-Content Part (MCP) format within the `tool_output` step of the ReAct thinking block.

This change:
- Safely parses `tool_output` content to detect and extract image data (base64).
- Renders images as clickable thumbnails using data URLs.
- Integrates `ImageModal` to allow users to view the generated images in full size.
This commit is contained in:
Akarshan 2025-10-28 18:43:09 +05:30
parent a3bfef0f24
commit 3c2ba624ed
No known key found for this signature in database
GPG Key ID: D75C9634A870665F

View File

@ -4,8 +4,9 @@ import { ChevronDown, ChevronUp, Loader, Check } from 'lucide-react'
import { create } from 'zustand' import { create } from 'zustand'
import { RenderMarkdown } from './RenderMarkdown' import { RenderMarkdown } from './RenderMarkdown'
import { useTranslation } from '@/i18n/react-i18next-compat' import { useTranslation } from '@/i18n/react-i18next-compat'
import { useMemo } from 'react' import { useMemo, useState } from 'react'
import { cn } from '@/lib/utils' import { cn } from '@/lib/utils'
import ImageModal from '@/containers/dialogs/ImageModal'
// Define ReActStep type (Reasoning-Action Step) // Define ReActStep type (Reasoning-Action Step)
type ReActStep = { type ReActStep = {
@ -23,6 +24,21 @@ interface Props {
duration?: number duration?: number
} }
// Utility function to safely parse JSON
const safeParseJSON = (text: string) => {
try {
return JSON.parse(text)
} catch {
return null
}
}
// Utility to create data URL for images
const createDataUrl = (base64Data: string, mimeType: string): string => {
if (base64Data.startsWith('data:')) return base64Data
return `data:${mimeType};base64,${base64Data}`
}
// Zustand store for thinking block state // Zustand store for thinking block state
type ThinkingBlockState = { type ThinkingBlockState = {
thinkingState: { [id: string]: boolean } thinkingState: { [id: string]: boolean }
@ -58,6 +74,15 @@ const ThinkingBlock = ({
const setThinkingState = useThinkingStore((state) => state.setThinkingState) const setThinkingState = useThinkingStore((state) => state.setThinkingState)
const { t } = useTranslation() const { t } = useTranslation()
// Move useState for modal management to the top level of the component
const [modalImage, setModalImage] = useState<{
url: string
alt: string
} | null>(null)
const closeModal = () => setModalImage(null)
const handleImageClick = (url: string, alt: string) =>
setModalImage({ url, alt })
// Actual loading state comes from prop, determined by whether final text started streaming (Req 2) // Actual loading state comes from prop, determined by whether final text started streaming (Req 2)
const loading = propLoading const loading = propLoading
@ -108,7 +133,12 @@ const ThinkingBlock = ({
} }
// --- Rendering Functions for Expanded View --- // --- Rendering Functions for Expanded View ---
const renderStepContent = (step: ReActStep, index: number) => { const renderStepContent = (
step: ReActStep,
index: number,
handleImageClick: (url: string, alt: string) => void,
t: (key: string) => string
) => {
// Updated type // Updated type
if (step.type === 'done') { if (step.type === 'done') {
const timeInSeconds = formatDuration(step.time ?? 0) const timeInSeconds = formatDuration(step.time ?? 0)
@ -131,7 +161,14 @@ const ThinkingBlock = ({
) )
} }
let contentDisplay const parsed = safeParseJSON(step.content)
const mcpContent = parsed?.content ?? []
const hasImages =
Array.isArray(mcpContent) &&
mcpContent.some((c) => c.type === 'image' && c.data && c.mimeType)
let contentDisplay: React.ReactNode
if (step.type === 'tool_call') { if (step.type === 'tool_call') {
const args = step.metadata ? step.metadata : '' const args = step.metadata ? step.metadata : ''
contentDisplay = ( contentDisplay = (
@ -150,19 +187,52 @@ const ThinkingBlock = ({
</> </>
) )
} else if (step.type === 'tool_output') { } else if (step.type === 'tool_output') {
contentDisplay = ( if (hasImages) {
<> // Display each image
<p className="font-medium text-main-view-fg/90">Tool Output:</p> contentDisplay = (
<div className="mt-1"> <>
<RenderMarkdown <p className="font-medium text-main-view-fg/90">
isWrapping={true} Tool Output (Images):
content={step.content.substring(0, 1000)} </p>
/> <div className="mt-2 space-y-2">
</div> {mcpContent.map((item: any, index: number) =>
</> item.type === 'image' && item.data && item.mimeType ? (
) <div key={index} className="my-2">
<img
src={createDataUrl(item.data, item.mimeType)}
alt={`MCP Image ${index + 1}`}
className="max-w-full max-h-64 object-contain rounded-md border border-main-view-fg/10 cursor-pointer hover:opacity-80 transition-opacity"
onError={(e) => (e.currentTarget.style.display = 'none')}
onClick={() =>
handleImageClick(
createDataUrl(item.data, item.mimeType),
`MCP Image ${index + 1}`
)
}
/>
</div>
) : null
)}
</div>
</>
)
} else {
// Default behavior: wrap text in code block if no backticks
let content = step.content.substring(0, 1000)
if (!content.includes('```')) {
content = '```json\n' + content + '\n```'
}
contentDisplay = (
<>
<p className="font-medium text-main-view-fg/90">Tool Output:</p>
<div className="mt-1">
<RenderMarkdown isWrapping={true} content={content} />
</div>
</>
)
}
} else { } else {
// reasoning
contentDisplay = ( contentDisplay = (
<RenderMarkdown isWrapping={true} content={step.content} /> <RenderMarkdown isWrapping={true} content={step.content} />
) )
@ -175,7 +245,7 @@ const ThinkingBlock = ({
) )
} }
const headerTitle = useMemo(() => { const headerTitle: string = useMemo(() => {
// Check if any step was a tool call // Check if any step was a tool call
const hasToolCalls = steps.some((step) => step.type === 'tool_call') const hasToolCalls = steps.some((step) => step.type === 'tool_call')
const hasReasoning = steps.some((step) => step.type === 'reasoning') const hasReasoning = steps.some((step) => step.type === 'reasoning')
@ -255,7 +325,7 @@ const ThinkingBlock = ({
)} )}
/> />
{/* Active step content */} {/* Active step content */}
{renderStepContent(activeStep, N - 1)} {renderStepContent(activeStep, N - 1, handleImageClick, t)}
</div> </div>
</div> </div>
</div> </div>
@ -285,13 +355,15 @@ const ThinkingBlock = ({
/> />
{/* Step Content */} {/* Step Content */}
{renderStepContent(step, index)} {renderStepContent(step, index, handleImageClick, t)}
</div> </div>
))} ))}
</div> </div>
</div> </div>
)} )}
</div> </div>
{/* Render ImageModal once at the top level */}
<ImageModal image={modalImage} onClose={closeModal} />
</div> </div>
) )
} }