From 3c2ba624ed5c1831fa2aa58e4dab1d9753106688 Mon Sep 17 00:00:00 2001 From: Akarshan Date: Tue, 28 Oct 2025 18:43:09 +0530 Subject: [PATCH] feat: Add image visualization for tool_output steps Implement support for displaying images returned in the Multi-Content Part (MCP) format within the `tool_output` step of the ReAct thinking block. This change: - Safely parses `tool_output` content to detect and extract image data (base64). - Renders images as clickable thumbnails using data URLs. - Integrates `ImageModal` to allow users to view the generated images in full size. --- web-app/src/containers/ThinkingBlock.tsx | 108 +++++++++++++++++++---- 1 file changed, 90 insertions(+), 18 deletions(-) diff --git a/web-app/src/containers/ThinkingBlock.tsx b/web-app/src/containers/ThinkingBlock.tsx index b5545a431..4f1e8bdeb 100644 --- a/web-app/src/containers/ThinkingBlock.tsx +++ b/web-app/src/containers/ThinkingBlock.tsx @@ -4,8 +4,9 @@ import { ChevronDown, ChevronUp, Loader, Check } from 'lucide-react' import { create } from 'zustand' import { RenderMarkdown } from './RenderMarkdown' import { useTranslation } from '@/i18n/react-i18next-compat' -import { useMemo } from 'react' +import { useMemo, useState } from 'react' import { cn } from '@/lib/utils' +import ImageModal from '@/containers/dialogs/ImageModal' // Define ReActStep type (Reasoning-Action Step) type ReActStep = { @@ -23,6 +24,21 @@ interface Props { duration?: number } +// Utility function to safely parse JSON +const safeParseJSON = (text: string) => { + try { + return JSON.parse(text) + } catch { + return null + } +} + +// Utility to create data URL for images +const createDataUrl = (base64Data: string, mimeType: string): string => { + if (base64Data.startsWith('data:')) return base64Data + return `data:${mimeType};base64,${base64Data}` +} + // Zustand store for thinking block state type ThinkingBlockState = { thinkingState: { [id: string]: boolean } @@ -58,6 +74,15 @@ const ThinkingBlock = ({ const setThinkingState = useThinkingStore((state) => state.setThinkingState) const { t } = useTranslation() + // Move useState for modal management to the top level of the component + const [modalImage, setModalImage] = useState<{ + url: string + alt: string + } | null>(null) + const closeModal = () => setModalImage(null) + const handleImageClick = (url: string, alt: string) => + setModalImage({ url, alt }) + // Actual loading state comes from prop, determined by whether final text started streaming (Req 2) const loading = propLoading @@ -108,7 +133,12 @@ const ThinkingBlock = ({ } // --- Rendering Functions for Expanded View --- - const renderStepContent = (step: ReActStep, index: number) => { + const renderStepContent = ( + step: ReActStep, + index: number, + handleImageClick: (url: string, alt: string) => void, + t: (key: string) => string + ) => { // Updated type if (step.type === 'done') { const timeInSeconds = formatDuration(step.time ?? 0) @@ -131,7 +161,14 @@ const ThinkingBlock = ({ ) } - let contentDisplay + const parsed = safeParseJSON(step.content) + const mcpContent = parsed?.content ?? [] + const hasImages = + Array.isArray(mcpContent) && + mcpContent.some((c) => c.type === 'image' && c.data && c.mimeType) + + let contentDisplay: React.ReactNode + if (step.type === 'tool_call') { const args = step.metadata ? step.metadata : '' contentDisplay = ( @@ -150,19 +187,52 @@ const ThinkingBlock = ({ ) } else if (step.type === 'tool_output') { - contentDisplay = ( - <> -

Tool Output:

-
- -
- - ) + if (hasImages) { + // Display each image + contentDisplay = ( + <> +

+ Tool Output (Images): +

+
+ {mcpContent.map((item: any, index: number) => + item.type === 'image' && item.data && item.mimeType ? ( +
+ {`MCP (e.currentTarget.style.display = 'none')} + onClick={() => + handleImageClick( + createDataUrl(item.data, item.mimeType), + `MCP Image ${index + 1}` + ) + } + /> +
+ ) : null + )} +
+ + ) + } else { + // Default behavior: wrap text in code block if no backticks + let content = step.content.substring(0, 1000) + if (!content.includes('```')) { + content = '```json\n' + content + '\n```' + } + + contentDisplay = ( + <> +

Tool Output:

+
+ +
+ + ) + } } else { - // reasoning contentDisplay = ( ) @@ -175,7 +245,7 @@ const ThinkingBlock = ({ ) } - const headerTitle = useMemo(() => { + const headerTitle: string = useMemo(() => { // Check if any step was a tool call const hasToolCalls = steps.some((step) => step.type === 'tool_call') const hasReasoning = steps.some((step) => step.type === 'reasoning') @@ -255,7 +325,7 @@ const ThinkingBlock = ({ )} /> {/* Active step content */} - {renderStepContent(activeStep, N - 1)} + {renderStepContent(activeStep, N - 1, handleImageClick, t)} @@ -285,13 +355,15 @@ const ThinkingBlock = ({ /> {/* Step Content */} - {renderStepContent(step, index)} + {renderStepContent(step, index, handleImageClick, t)} ))} )} + {/* Render ImageModal once at the top level */} + ) }