feat: Add image visualization for tool_output steps

Implement support for displaying images returned in the Multi-Content Part (MCP) format within the `tool_output` step of the ReAct thinking block. This change: - Safely parses `tool_output` content to detect and extract image data (base64). - Renders images as clickable thumbnails using data URLs. - Integrates `ImageModal` to allow users to view the generated images in full size.
2025-10-28 18:43:09 +05:30 · 2025-10-28 18:43:09 +05:30 · 3c2ba624ed
commit 3c2ba624ed
parent a3bfef0f24
1 changed files with 90 additions and 18 deletions
--- a/web-app/src/containers/ThinkingBlock.tsx
+++ b/web-app/src/containers/ThinkingBlock.tsx
@ -4,8 +4,9 @@ import { ChevronDown, ChevronUp, Loader, Check } from 'lucide-react'
 import { create } from 'zustand'
 import { RenderMarkdown } from './RenderMarkdown'
 import { useTranslation } from '@/i18n/react-i18next-compat'
-import { useMemo } from 'react'
+import { useMemo, useState } from 'react'
 import { cn } from '@/lib/utils'
 import ImageModal from '@/containers/dialogs/ImageModal'
 // Define ReActStep type (Reasoning-Action Step)
 type ReActStep = {
@ -23,6 +24,21 @@ interface Props {
  duration?: number
 }
 // Utility function to safely parse JSON
 const safeParseJSON = (text: string) => {
  try {
    return JSON.parse(text)
  } catch {
    return null
  }
 }
 // Utility to create data URL for images
 const createDataUrl = (base64Data: string, mimeType: string): string => {
  if (base64Data.startsWith('data:')) return base64Data
  return `data:${mimeType};base64,${base64Data}`
 }
 // Zustand store for thinking block state
 type ThinkingBlockState = {
  thinkingState: { [id: string]: boolean }
@ -58,6 +74,15 @@ const ThinkingBlock = ({
  const setThinkingState = useThinkingStore((state) => state.setThinkingState)
  const { t } = useTranslation()
  // Move useState for modal management to the top level of the component
  const [modalImage, setModalImage] = useState<{
    url: string
    alt: string
  } | null>(null)
  const closeModal = () => setModalImage(null)
  const handleImageClick = (url: string, alt: string) =>
    setModalImage({ url, alt })
  // Actual loading state comes from prop, determined by whether final text started streaming (Req 2)
  const loading = propLoading
@ -108,7 +133,12 @@ const ThinkingBlock = ({
  }
  // --- Rendering Functions for Expanded View ---
-  const renderStepContent = (step: ReActStep, index: number) => {
+  const renderStepContent = (
    step: ReActStep,
    index: number,
    handleImageClick: (url: string, alt: string) => void,
    t: (key: string) => string
  ) => {
    // Updated type
    if (step.type === 'done') {
      const timeInSeconds = formatDuration(step.time ?? 0)
@ -131,7 +161,14 @@ const ThinkingBlock = ({
      )
    }
-    let contentDisplay
+    const parsed = safeParseJSON(step.content)
    const mcpContent = parsed?.content ?? []
    const hasImages =
      Array.isArray(mcpContent) &&
      mcpContent.some((c) => c.type === 'image' && c.data && c.mimeType)
    let contentDisplay: React.ReactNode
    if (step.type === 'tool_call') {
      const args = step.metadata ? step.metadata : ''
      contentDisplay = (
@ -150,19 +187,52 @@ const ThinkingBlock = ({
        </>
      )
    } else if (step.type === 'tool_output') {
-      contentDisplay = (
+      if (hasImages) {
-        <>
+        // Display each image
-          <p className="font-medium text-main-view-fg/90">Tool Output:</p>
+        contentDisplay = (
-          <div className="mt-1">
+          <>
-            <RenderMarkdown
+            <p className="font-medium text-main-view-fg/90">
-              isWrapping={true}
+              Tool Output (Images):
-              content={step.content.substring(0, 1000)}
+            </p>
-            />
+            <div className="mt-2 space-y-2">
-          </div>
+              {mcpContent.map((item: any, index: number) =>
-        </>
+                item.type === 'image' && item.data && item.mimeType ? (
-      )
+                  <div key={index} className="my-2">
                    <img
                      src={createDataUrl(item.data, item.mimeType)}
                      alt={`MCP Image ${index + 1}`}
                      className="max-w-full max-h-64 object-contain rounded-md border border-main-view-fg/10 cursor-pointer hover:opacity-80 transition-opacity"
                      onError={(e) => (e.currentTarget.style.display = 'none')}
                      onClick={() =>
                        handleImageClick(
                          createDataUrl(item.data, item.mimeType),
                          `MCP Image ${index + 1}`
                        )
                      }
                    />
                  </div>
                ) : null
              )}
            </div>
          </>
        )
      } else {
        // Default behavior: wrap text in code block if no backticks
        let content = step.content.substring(0, 1000)
        if (!content.includes('```')) {
          content = '```json\n' + content + '\n```'
        }
        contentDisplay = (
          <>
            <p className="font-medium text-main-view-fg/90">Tool Output:</p>
            <div className="mt-1">
              <RenderMarkdown isWrapping={true} content={content} />
            </div>
          </>
        )
      }
    } else {
      // reasoning
      contentDisplay = (
        <RenderMarkdown isWrapping={true} content={step.content} />
      )
@ -175,7 +245,7 @@ const ThinkingBlock = ({
    )
  }
-  const headerTitle = useMemo(() => {
+  const headerTitle: string = useMemo(() => {
    // Check if any step was a tool call
    const hasToolCalls = steps.some((step) => step.type === 'tool_call')
    const hasReasoning = steps.some((step) => step.type === 'reasoning')
@ -255,7 +325,7 @@ const ThinkingBlock = ({
                  )}
                />
                {/* Active step content */}
-                {renderStepContent(activeStep, N - 1)}
+                {renderStepContent(activeStep, N - 1, handleImageClick, t)}
              </div>
            </div>
          </div>
@ -285,13 +355,15 @@ const ThinkingBlock = ({
                  />
                  {/* Step Content */}
-                  {renderStepContent(step, index)}
+                  {renderStepContent(step, index, handleImageClick, t)}
                </div>
              ))}
            </div>
          </div>
        )}
      </div>
      {/* Render ImageModal once at the top level */}
      <ImageModal image={modalImage} onClose={closeModal} />
    </div>
  )
 }