feat: Add image visualization for tool_output steps

Implement support for displaying images returned in the Multi-Content Part (MCP) format within the `tool_output` step of the ReAct thinking block.

This change:
- Safely parses `tool_output` content to detect and extract image data (base64).
- Renders images as clickable thumbnails using data URLs.
- Integrates `ImageModal` to allow users to view the generated images in full size.
This commit is contained in:
Akarshan 2025-10-28 18:43:09 +05:30
parent a3bfef0f24
commit 3c2ba624ed
No known key found for this signature in database
GPG Key ID: D75C9634A870665F

View File

@ -4,8 +4,9 @@ import { ChevronDown, ChevronUp, Loader, Check } from 'lucide-react'
import { create } from 'zustand'
import { RenderMarkdown } from './RenderMarkdown'
import { useTranslation } from '@/i18n/react-i18next-compat'
import { useMemo } from 'react'
import { useMemo, useState } from 'react'
import { cn } from '@/lib/utils'
import ImageModal from '@/containers/dialogs/ImageModal'
// Define ReActStep type (Reasoning-Action Step)
type ReActStep = {
@ -23,6 +24,21 @@ interface Props {
duration?: number
}
// Utility function to safely parse JSON
const safeParseJSON = (text: string) => {
try {
return JSON.parse(text)
} catch {
return null
}
}
// Utility to create data URL for images
const createDataUrl = (base64Data: string, mimeType: string): string => {
if (base64Data.startsWith('data:')) return base64Data
return `data:${mimeType};base64,${base64Data}`
}
// Zustand store for thinking block state
type ThinkingBlockState = {
thinkingState: { [id: string]: boolean }
@ -58,6 +74,15 @@ const ThinkingBlock = ({
const setThinkingState = useThinkingStore((state) => state.setThinkingState)
const { t } = useTranslation()
// Move useState for modal management to the top level of the component
const [modalImage, setModalImage] = useState<{
url: string
alt: string
} | null>(null)
const closeModal = () => setModalImage(null)
const handleImageClick = (url: string, alt: string) =>
setModalImage({ url, alt })
// Actual loading state comes from prop, determined by whether final text started streaming (Req 2)
const loading = propLoading
@ -108,7 +133,12 @@ const ThinkingBlock = ({
}
// --- Rendering Functions for Expanded View ---
const renderStepContent = (step: ReActStep, index: number) => {
const renderStepContent = (
step: ReActStep,
index: number,
handleImageClick: (url: string, alt: string) => void,
t: (key: string) => string
) => {
// Updated type
if (step.type === 'done') {
const timeInSeconds = formatDuration(step.time ?? 0)
@ -131,7 +161,14 @@ const ThinkingBlock = ({
)
}
let contentDisplay
const parsed = safeParseJSON(step.content)
const mcpContent = parsed?.content ?? []
const hasImages =
Array.isArray(mcpContent) &&
mcpContent.some((c) => c.type === 'image' && c.data && c.mimeType)
let contentDisplay: React.ReactNode
if (step.type === 'tool_call') {
const args = step.metadata ? step.metadata : ''
contentDisplay = (
@ -150,19 +187,52 @@ const ThinkingBlock = ({
</>
)
} else if (step.type === 'tool_output') {
contentDisplay = (
<>
<p className="font-medium text-main-view-fg/90">Tool Output:</p>
<div className="mt-1">
<RenderMarkdown
isWrapping={true}
content={step.content.substring(0, 1000)}
/>
</div>
</>
)
if (hasImages) {
// Display each image
contentDisplay = (
<>
<p className="font-medium text-main-view-fg/90">
Tool Output (Images):
</p>
<div className="mt-2 space-y-2">
{mcpContent.map((item: any, index: number) =>
item.type === 'image' && item.data && item.mimeType ? (
<div key={index} className="my-2">
<img
src={createDataUrl(item.data, item.mimeType)}
alt={`MCP Image ${index + 1}`}
className="max-w-full max-h-64 object-contain rounded-md border border-main-view-fg/10 cursor-pointer hover:opacity-80 transition-opacity"
onError={(e) => (e.currentTarget.style.display = 'none')}
onClick={() =>
handleImageClick(
createDataUrl(item.data, item.mimeType),
`MCP Image ${index + 1}`
)
}
/>
</div>
) : null
)}
</div>
</>
)
} else {
// Default behavior: wrap text in code block if no backticks
let content = step.content.substring(0, 1000)
if (!content.includes('```')) {
content = '```json\n' + content + '\n```'
}
contentDisplay = (
<>
<p className="font-medium text-main-view-fg/90">Tool Output:</p>
<div className="mt-1">
<RenderMarkdown isWrapping={true} content={content} />
</div>
</>
)
}
} else {
// reasoning
contentDisplay = (
<RenderMarkdown isWrapping={true} content={step.content} />
)
@ -175,7 +245,7 @@ const ThinkingBlock = ({
)
}
const headerTitle = useMemo(() => {
const headerTitle: string = useMemo(() => {
// Check if any step was a tool call
const hasToolCalls = steps.some((step) => step.type === 'tool_call')
const hasReasoning = steps.some((step) => step.type === 'reasoning')
@ -255,7 +325,7 @@ const ThinkingBlock = ({
)}
/>
{/* Active step content */}
{renderStepContent(activeStep, N - 1)}
{renderStepContent(activeStep, N - 1, handleImageClick, t)}
</div>
</div>
</div>
@ -285,13 +355,15 @@ const ThinkingBlock = ({
/>
{/* Step Content */}
{renderStepContent(step, index)}
{renderStepContent(step, index, handleImageClick, t)}
</div>
))}
</div>
</div>
)}
</div>
{/* Render ImageModal once at the top level */}
<ImageModal image={modalImage} onClose={closeModal} />
</div>
)
}