From 6e46988b03fbc6ecbda44f250e8fe6ac4bc2fd5e Mon Sep 17 00:00:00 2001
From: Akarshan <akarshan@menlo.ai>
Date: Tue, 21 Oct 2025 20:35:29 +0530
Subject: [PATCH] fix: final text stream rendering

---
 web-app/src/containers/ThinkingBlock.tsx |  47 ++--
 web-app/src/containers/ThreadContent.tsx | 287 ++++++++++++++++-------
 web-app/src/hooks/useChat.ts             |  38 ++-
 web-app/src/lib/completion.ts            |  60 ++++-
 4 files changed, 317 insertions(+), 115 deletions(-)
diff --git a/web-app/src/containers/ThinkingBlock.tsx b/web-app/src/containers/ThinkingBlock.tsx
index e09c8b2f0..d59b41415 100644
--- a/web-app/src/containers/ThinkingBlock.tsx
+++ b/web-app/src/containers/ThinkingBlock.tsx
@@ -7,9 +7,9 @@ import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useMemo } from 'react'
 import { cn } from '@/lib/utils'
 
-// Define ThoughtStep type
-type ThoughtStep = {
-  type: 'thought' | 'tool_call' | 'tool_output' | 'done'
+// Define ReActStep type (Reasoning-Action Step)
+type ReActStep = {
+  type: 'reasoning' | 'tool_call' | 'tool_output' | 'done' // Changed 'thought' to 'reasoning'
   content: string
   metadata?: any
   time?: number
@@ -18,7 +18,7 @@ type ThoughtStep = {
 interface Props {
   text: string
   id: string
-  steps?: ThoughtStep[]
+  steps?: ReActStep[] // Updated type
   loading?: boolean
   duration?: number
 }
@@ -73,14 +73,10 @@ const ThinkingBlock = ({
   const N = stepsWithoutDone.length
 
   // Determine the step to display in the condensed streaming view
-  // When step N-1 is streaming, show the previously finished step (N-2).
-  const stepToRenderWhenStreaming = useMemo(() => {
-    if (!loading) return null
-    // If N >= 2, the N-1 step is currently streaming, so we show the finished step N-2.
-    if (N >= 2) {
-      return stepsWithoutDone[N - 2]
-    }
-    return null
+  // When loading, we show the last available step (N-1), which is currently accumulating content.
+  const activeStep = useMemo(() => {
+    if (!loading || N === 0) return null
+    return stepsWithoutDone[N - 1]
   }, [loading, N, stepsWithoutDone])
 
   // Determine if the block is truly empty (streaming started but no content/steps yet)
@@ -112,7 +108,8 @@ const ThinkingBlock = ({
   }
 
   // --- Rendering Functions for Expanded View ---
-  const renderStepContent = (step: ThoughtStep, index: number) => {
+  const renderStepContent = (step: ReActStep, index: number) => {
+    // Updated type
     if (step.type === 'done') {
       const timeInSeconds = formatDuration(step.time ?? 0)
       const timeDisplay =
@@ -165,7 +162,7 @@ const ThinkingBlock = ({
         </>
       )
     } else {
-      // thought
+      // reasoning
       contentDisplay = (
         <RenderMarkdown isWrapping={true} content={step.content} />
       )
@@ -216,21 +213,27 @@ const ThinkingBlock = ({
           </button>
         </div>
 
-        {/* Streaming/Condensed View - shows previous finished step */}
-        {loading && stepToRenderWhenStreaming && (
+        {/* Streaming/Condensed View - shows active step (N-1) */}
+        {loading && activeStep && (
           <div
-            key={`streaming-${N - 2}`}
+            key={`streaming-${N - 1}`}
             className={cn(
               'mt-4 pl-2 pr-4 text-main-view-fg/60',
-              'animate-in fade-in slide-in-from-top-2 duration-300'
+              // Only animate fade-in if it's not the very first step (N > 1)
+              N > 1 && 'animate-in fade-in slide-in-from-top-2 duration-300'
             )}
           >
             <div className="relative border-main-view-fg/20">
               <div className="relative pl-5">
-                {/* Bullet point */}
-                <div className="absolute left-[-2px] top-1.5 size-2 rounded-full bg-main-view-fg/60 animate-pulse" />
-                {/* Previous completed step content */}
-                {renderStepContent(stepToRenderWhenStreaming, N - 2)}
+                {/* Bullet point/Icon position relative to line */}
+                <div
+                  className={cn(
+                    'absolute left-[-2px] top-1.5 size-2 rounded-full bg-main-view-fg/60',
+                    activeStep.type !== 'done' && 'animate-pulse' // Pulse if active/streaming
+                  )}
+                />
+                {/* Active step content */}
+                {renderStepContent(activeStep, N - 1)}
               </div>
             </div>
           </div>
diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx
index 4d4585076..047c86006 100644
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@@ -28,7 +28,6 @@ import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { extractFilesFromPrompt } from '@/lib/fileMetadata'
 import { createImageAttachment } from '@/types/attachment'
-import { extractThinkingContent } from '@/lib/utils'
 
 // Define ToolCall interface for type safety when accessing metadata
 interface ToolCall {
@@ -43,14 +42,21 @@ interface ToolCall {
   state?: 'pending' | 'completed'
 }
 
-// Define ThoughtStep type
-type ThoughtStep = {
-  type: 'thought' | 'tool_call' | 'tool_output' | 'done'
+// Define ReActStep type (Reasoning-Action Step)
+type ReActStep = {
+  type: 'reasoning' | 'tool_call' | 'tool_output' | 'done'
   content: string
   metadata?: any
   time?: number
 }
 
+const cleanReasoning = (content: string) => {
+  return content
+    .replace(/^<think>/, '') // Remove opening tag at start
+    .replace(/<\/think>$/, '') // Remove closing tag at end
+    .trim()
+}
+
 const CopyButton = ({ text }: { text: string }) => {
   const [copied, setCopied] = useState(false)
   const { t } = useTranslation()
@@ -135,37 +141,38 @@ export const ThreadContent = memo(
     }, [text, item.role])
 
     const { reasoningSegment, textSegment } = useMemo(() => {
-      // Check for thinking formats
-      const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
-      const hasAnalysisChannel =
-        text.includes('<|channel|>analysis<|message|>') &&
-        !text.includes('<|start|>assistant<|channel|>final<|message|>')
-
-      if (hasThinkTag || hasAnalysisChannel)
-        return { reasoningSegment: text, textSegment: '' }
+      let reasoningSegment = undefined
+      let textSegment = text
 
       // Check for completed think tag format
-      const thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/)
-      if (thinkMatch?.index !== undefined) {
-        const splitIndex = thinkMatch.index + thinkMatch[0].length
-        return {
-          reasoningSegment: text.slice(0, splitIndex),
-          textSegment: text.slice(splitIndex),
-        }
-      }
-
-      // Check for completed analysis channel format
-      const analysisMatch = text.match(
-        /<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/
-      )
-      if (analysisMatch?.index !== undefined) {
-        const splitIndex = analysisMatch.index + analysisMatch[0].length
-        return {
-          reasoningSegment: text.slice(0, splitIndex),
-          textSegment: text.slice(splitIndex),
-        }
+      console.log(textSegment)
+      const thinkStartTag = '<think>'
+      const thinkEndTag = '</think>'
+
+      const firstThinkIndex = text.indexOf(thinkStartTag)
+      const lastThinkEndIndex = text.lastIndexOf(thinkEndTag)
+
+      if (firstThinkIndex !== -1 && lastThinkEndIndex > firstThinkIndex) {
+        // If multiple <think>...</think> blocks exist sequentially, we capture the entire span
+        // from the start of the first tag to the end of the last tag.
+        const splitIndex = lastThinkEndIndex + thinkEndTag.length
+
+        reasoningSegment = text.slice(firstThinkIndex, splitIndex)
+        textSegment = text.slice(splitIndex).trim()
+
+        return { reasoningSegment, textSegment }
+      }
+      // If streaming, and we see the opening tag, the entire message is reasoningSegment
+      const hasThinkTagStart =
+        text.includes(thinkStartTag) && !text.includes(thinkEndTag)
+
+      if (hasThinkTagStart) {
+        reasoningSegment = text
+        textSegment = ''
+        return { reasoningSegment, textSegment }
       }
 
+      // Default: No reasoning found, or it's a message composed entirely of final text.
       return { reasoningSegment: undefined, textSegment: text }
     }, [text])
 
@@ -252,79 +259,194 @@ export const ThreadContent = memo(
       | { avatar?: React.ReactNode; name?: React.ReactNode }
       | undefined
 
-    // Constructing allSteps for ThinkingBlock (Fixing Interleaving and Done step)
-    const allSteps: ThoughtStep[] = useMemo(() => {
-      const steps: ThoughtStep[] = []
+    type StreamEvent = {
+      timestamp: number
+      type: 'reasoning_chunk' | 'tool_call' | 'tool_output'
+      data: any
+    }
 
-      // Extract thought paragraphs from reasoningSegment. We assume these are ordered
-      // relative to tool calls.
-      const thoughtText = extractThinkingContent(reasoningSegment || '')
-      const thoughtParagraphs = thoughtText
-        ? thoughtText
+    // Constructing allSteps for ThinkingBlock - CHRONOLOGICAL approach
+    const allSteps: ReActStep[] = useMemo(() => {
+      const steps: ReActStep[] = []
+
+      // Get streamEvents from metadata (if available)
+      const streamEvents = (item.metadata?.streamEvents as StreamEvent[]) || []
+      const toolCalls = (item.metadata?.tool_calls || []) as ToolCall[]
+
+      if (streamEvents.length > 0) {
+        // CHRONOLOGICAL PATH: Use streamEvents for true temporal order
+        let reasoningBuffer = ''
+
+        streamEvents.forEach((event) => {
+          switch (event.type) {
+            case 'reasoning_chunk':
+              // Accumulate reasoning chunks
+              reasoningBuffer += event.data.content
+              break
+
+            case 'tool_call':
+            case 'tool_output':
+              // Flush accumulated reasoning before tool event
+              if (reasoningBuffer.trim()) {
+                const cleanedBuffer = cleanReasoning(reasoningBuffer) // <--- Strip tags here
+
+                // Split accumulated reasoning by paragraphs for display
+                const paragraphs = cleanedBuffer
+                  .split(/\n\s*\n/)
+                  .filter((p) => p.trim().length > 0)
+
+                paragraphs.forEach((para) => {
+                  steps.push({
+                    type: 'reasoning',
+                    content: para.trim(),
+                  })
+                })
+
+                reasoningBuffer = ''
+              }
+
+              if (event.type === 'tool_call') {
+                // Add tool call
+                const toolCall = event.data.toolCall
+                steps.push({
+                  type: 'tool_call',
+                  content: toolCall?.function?.name || 'Tool Call',
+                  metadata:
+                    typeof toolCall?.function?.arguments === 'string'
+                      ? toolCall.function.arguments
+                      : JSON.stringify(
+                          toolCall?.function?.arguments || {},
+                          null,
+                          2
+                        ),
+                })
+              } else if (event.type === 'tool_output') {
+                // Add tool output
+                const result = event.data.result
+                let outputContent = JSON.stringify(result, null, 2) // Default fallback
+
+                const firstContentPart = result?.content?.[0]
+
+                if (firstContentPart?.type === 'text') {
+                  const textContent = firstContentPart.text
+                  // Robustly check for { value: string } structure or direct string
+                  if (
+                    typeof textContent === 'object' &&
+                    textContent !== null &&
+                    'value' in textContent
+                  ) {
+                    outputContent = textContent.value as string
+                  } else if (typeof textContent === 'string') {
+                    outputContent = textContent
+                  }
+                } else if (typeof result === 'string') {
+                  outputContent = result
+                }
+
+                steps.push({
+                  type: 'tool_output',
+                  content: outputContent,
+                })
+              }
+              break
+          }
+        })
+
+        // Flush any remaining reasoning at the end
+        if (reasoningBuffer.trim()) {
+          const cleanedBuffer = cleanReasoning(reasoningBuffer) // <--- Strip tags here
+          const paragraphs = cleanedBuffer
             .split(/\n\s*\n/)
-            .filter((s) => s.trim().length > 0)
-            .map((content) => content.trim())
-        : []
+            .filter((p) => p.trim().length > 0)
 
-      let thoughtIndex = 0
-
-      // Interleave tool steps and thought steps
-      if (isToolCalls && item.metadata?.tool_calls) {
-        const toolCalls = item.metadata.tool_calls as ToolCall[]
-
-        for (const call of toolCalls) {
-          // Check for thought chunk preceding this tool call
-          if (thoughtIndex < thoughtParagraphs.length) {
+          paragraphs.forEach((para) => {
             steps.push({
-              type: 'thought',
-              content: thoughtParagraphs[thoughtIndex],
+              type: 'reasoning',
+              content: para.trim(),
             })
-            thoughtIndex++
+          })
+        }
+      } else {
+        console.debug('Fallback mode!!!!')
+        // FALLBACK PATH: No streamEvents - use old paragraph-splitting logic
+        const rawReasoningContent = cleanReasoning(reasoningSegment || '')
+        const reasoningParagraphs = rawReasoningContent
+          ? rawReasoningContent
+              .split(/\n\s*\n/)
+              .filter((s) => s.trim().length > 0)
+              .map((content) => content.trim())
+          : []
+
+        let reasoningIndex = 0
+
+        toolCalls.forEach((call) => {
+          // Add reasoning before this tool call
+          if (reasoningIndex < reasoningParagraphs.length) {
+            steps.push({
+              type: 'reasoning',
+              content: reasoningParagraphs[reasoningIndex],
+            })
+            reasoningIndex++
           }
 
-          // Tool Call Step
+          // Add tool call
           steps.push({
             type: 'tool_call',
             content: call.tool?.function?.name || 'Tool Call',
-            metadata: call.tool?.function?.arguments as string,
+            metadata:
+              typeof call.tool?.function?.arguments === 'string'
+                ? call.tool.function.arguments
+                : JSON.stringify(call.tool?.function?.arguments || {}, null, 2),
           })
 
-          // Tool Output Step
+          // Add tool output
           if (call.response) {
-            const outputContent =
-              typeof call.response === 'string'
-                ? call.response
-                : JSON.stringify(call.response, null, 2)
+            const result = call.response
+            let outputContent = JSON.stringify(result, null, 2)
+
+            const firstContentPart = result?.content?.[0]
+
+            if (firstContentPart?.type === 'text') {
+              const textContent = firstContentPart.text
+              if (
+                typeof textContent === 'object' &&
+                textContent !== null &&
+                'value' in textContent
+              ) {
+                outputContent = textContent.value as string
+              } else if (typeof textContent === 'string') {
+                outputContent = textContent
+              }
+            } else if (typeof result === 'string') {
+              outputContent = result
+            }
 
             steps.push({
               type: 'tool_output',
               content: outputContent,
             })
           }
+        })
+
+        // Add remaining reasoning
+        while (reasoningIndex < reasoningParagraphs.length) {
+          steps.push({
+            type: 'reasoning',
+            content: reasoningParagraphs[reasoningIndex],
+          })
+          reasoningIndex++
         }
       }
 
-      // Add remaining thoughts (e.g., final answer formulation thought)
-      while (thoughtIndex < thoughtParagraphs.length) {
-        steps.push({
-          type: 'thought',
-          content: thoughtParagraphs[thoughtIndex],
-        })
-        thoughtIndex++
-      }
-
-      // Add Done step only if the sequence is concluded for display
+      // Add Done step
       const totalTime = item.metadata?.totalThinkingTime as number | undefined
       const lastStepType = steps[steps.length - 1]?.type
 
-      // If the message is finalized (not streaming) AND the last step was a tool output
-      // AND there is no subsequent final text, we suppress 'done' to allow seamless transition
-      // to the next assistant message/thought block.
-      const endsInToolOutputWithoutFinalText =
-        lastStepType === 'tool_output' && textSegment.length === 0
-
       if (!isStreamingThisThread && (hasReasoning || isToolCalls)) {
-        if (textSegment.length > 0 || !endsInToolOutputWithoutFinalText) {
+        const endsInToolOutputWithoutFinalText =
+          lastStepType === 'tool_output' && textSegment.length === 0
+
+        if (!endsInToolOutputWithoutFinalText) {
           steps.push({
             type: 'done',
             content: 'Done',
@@ -335,11 +457,11 @@ export const ThreadContent = memo(
 
       return steps
     }, [
+      item,
       reasoningSegment,
-      isToolCalls,
-      item.metadata,
       isStreamingThisThread,
       hasReasoning,
+      isToolCalls,
       textSegment,
     ])
     // END: Constructing allSteps
@@ -504,10 +626,7 @@ export const ThreadContent = memo(
               />
             )}
 
-            <RenderMarkdown
-              content={textSegment.replace('</think>', '')}
-              components={linkComponents}
-            />
+            <RenderMarkdown content={textSegment} components={linkComponents} />
 
             {!isToolCalls && (
               <div className="flex items-center gap-2 text-main-view-fg/60 text-xs">
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 60541a1a1..80d1a5914 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -41,6 +41,12 @@ import { TEMPORARY_CHAT_QUERY_ID, TEMPORARY_CHAT_ID } from '@/constants/chat'
 import { toast } from 'sonner'
 import { Attachment } from '@/types/attachment'
 
+type StreamEvent = {
+  timestamp: number
+  type: 'reasoning_chunk' | 'tool_call' | 'tool_output'
+  data: any
+}
+
 export const useChat = () => {
   const [
     updateTokenSpeed,
@@ -279,6 +285,8 @@ export const useChat = () => {
       const selectedProvider = useModelProvider.getState().selectedProvider
       let activeProvider = getProviderByName(selectedProvider)
 
+      const streamEvents: StreamEvent[] = []
+
       resetTokenSpeed()
       if (!activeThread || !activeProvider) return
 
@@ -555,6 +563,7 @@ export const useChat = () => {
                       ...e,
                       state: 'pending',
                     })),
+                    streamEvents: streamEvents,
                   }
                 )
                 updateStreamingContent(currentContent)
@@ -591,6 +600,7 @@ export const useChat = () => {
                     ...e,
                     state: 'pending',
                   })),
+                  streamEvents: streamEvents,
                 }
               )
               updateStreamingContent(currentContent)
@@ -636,16 +646,37 @@ export const useChat = () => {
                 if ('usage' in part && part.usage) {
                   tokenUsage = part.usage
                 }
+                const deltaToolCalls = part.choices[0]?.delta?.tool_calls
+                if (deltaToolCalls) {
+                  const index = deltaToolCalls[0]?.index
+                  // Check if this chunk starts a brand new tool call
+                  const isNewToolCallStart =
+                    index !== undefined && toolCalls[index] === undefined
 
-                if (part.choices[0]?.delta?.tool_calls) {
                   extractToolCall(part, currentCall, toolCalls)
-                  // Schedule a flush to reflect tool update
-                  scheduleFlush()
+
+                  if (isNewToolCallStart) {
+                    // Track tool call event only when it begins
+                    // toolCalls[index] is the newly created object due to extractToolCall
+                    streamEvents.push({
+                      timestamp: Date.now(),
+                      type: 'tool_call',
+                      data: { toolCall: toolCalls[index] },
+                    })
+                    // Schedule a flush to reflect tool update
+                    scheduleFlush()
+                  }
                 }
                 const deltaReasoning =
                   reasoningProcessor.processReasoningChunk(part)
                 if (deltaReasoning) {
                   accumulatedText += deltaReasoning
+                  // Track reasoning event
+                  streamEvents.push({
+                    timestamp: Date.now(),
+                    type: 'reasoning_chunk',
+                    data: { content: deltaReasoning },
+                  })
                   pendingDeltaCount += 1
                   // Schedule flush for reasoning updates
                   scheduleFlush()
@@ -728,6 +759,7 @@ export const useChat = () => {
         const messageMetadata: Record<string, any> = {
           tokenSpeed: useAppState.getState().tokenSpeed,
           assistant: currentAssistant,
+          streamEvents, // Add chronological events
         }
 
         if (accumulatedText.includes('<think>') || toolCalls.length > 0) {
diff --git a/web-app/src/lib/completion.ts b/web-app/src/lib/completion.ts
index 14f4ff148..3b71333c0 100644
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@@ -687,6 +687,16 @@ export const postMessageProcessing = async (
       toolCallEntry.response = result
       toolCallEntry.state = 'ready'
       if (updateStreamingUI) updateStreamingUI({ ...message }) // Show result
+      const streamEvents = (message.metadata?.streamEvents || []) as any[]
+      streamEvents.push({
+        timestamp: Date.now(),
+        type: 'tool_output',
+        data: { result: result },
+      })
+      message.metadata = {
+        ...(message.metadata ?? {}),
+        streamEvents: streamEvents,
+      }
       builder.addToolMessage(result as ToolResult, toolCall.id)
 
       // Proactive mode: Capture screenshot/snapshot after browser tool execution
@@ -734,6 +744,7 @@ export const postMessageProcessing = async (
         if (followUpCompletion) {
           let followUpText = ''
           const newToolCalls: ChatCompletionMessageToolCall[] = []
+          const streamEvents = (message.metadata?.streamEvents || []) as any[]
           const textContent = message.content.find(
             (c) => c.type === ContentType.Text
           )
@@ -758,19 +769,56 @@ export const postMessageProcessing = async (
 
               if (textContent?.text) {
                 if (deltaReasoning) textContent.text.value += deltaReasoning
-                if (deltaContent) textContent.text.value += deltaContent
+                if (deltaContent) {
+                  textContent.text.value += deltaContent
+                  followUpText += deltaContent
+                  console.log(`delta content from followup:\n${deltaContent}`)
+                }
               }
-              if (deltaContent) followUpText += deltaContent
+              if (deltaReasoning) {
+                streamEvents.push({
+                  timestamp: Date.now(),
+                  type: 'reasoning_chunk',
+                  data: { content: deltaReasoning },
+                })
+              }
+              const initialToolCallCount = newToolCalls.length
 
               if (chunk.choices[0]?.delta?.tool_calls) {
                 extractToolCall(chunk, null, newToolCalls)
+                if (newToolCalls.length > initialToolCallCount) {
+                  // The new tool call is the last element added
+                  streamEvents.push({
+                    timestamp: Date.now(),
+                    type: 'tool_call',
+                    data: { toolCall: newToolCalls[newToolCalls.length - 1] },
+                  })
+                }
+              }
+              // Ensure the metadata is updated before calling updateStreamingUI
+              message.metadata = {
+                ...(message.metadata ?? {}),
+                streamEvents: streamEvents,
               }
 
-              if (updateStreamingUI) updateStreamingUI({ ...message })
+              if (updateStreamingUI) {
+                // FIX: Create a new object reference for the content array
+                // This forces the memoized component to detect the change in the mutated text
+                const uiMessage: ThreadMessage = {
+                  ...message,
+                  content: message.content.map((c) => ({ ...c })), // Shallow copy array and its parts
+                }
+                updateStreamingUI(uiMessage)
+              }
             }
-            if (textContent?.text) {
-              textContent.text.value += reasoningProcessor.finalize()
-              if (updateStreamingUI) updateStreamingUI({ ...message })
+            if (textContent?.text && updateStreamingUI) {
+              // FIX: Create a new object reference for the content array
+              // This forces the memoized component to detect the change in the mutated text
+              const uiMessage: ThreadMessage = {
+                ...message,
+                content: message.content.map((c) => ({ ...c })), // Shallow copy array and its parts
+              }
+              updateStreamingUI(uiMessage)
             }
           }