fix: final text stream rendering

2025-10-21 20:35:29 +05:30 · 2025-10-21 20:35:29 +05:30 · 6e46988b03
commit 6e46988b03
parent 9699b4805c
4 changed files with 317 additions and 115 deletions
--- a/web-app/src/containers/ThinkingBlock.tsx
+++ b/web-app/src/containers/ThinkingBlock.tsx
@ -7,9 +7,9 @@ import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useMemo } from 'react'
 import { cn } from '@/lib/utils'

-// Define ThoughtStep type
-type ThoughtStep = {
-  type: 'thought' | 'tool_call' | 'tool_output' | 'done'
+// Define ReActStep type (Reasoning-Action Step)
+type ReActStep = {
+  type: 'reasoning' | 'tool_call' | 'tool_output' | 'done' // Changed 'thought' to 'reasoning'
  content: string
  metadata?: any
  time?: number
@ -18,7 +18,7 @@ type ThoughtStep = {
 interface Props {
  text: string
  id: string
-  steps?: ThoughtStep[]
+  steps?: ReActStep[] // Updated type
  loading?: boolean
  duration?: number
 }
@ -73,14 +73,10 @@ const ThinkingBlock = ({
  const N = stepsWithoutDone.length

  // Determine the step to display in the condensed streaming view
-  // When step N-1 is streaming, show the previously finished step (N-2).
-  const stepToRenderWhenStreaming = useMemo(() => {
-    if (!loading) return null
-    // If N >= 2, the N-1 step is currently streaming, so we show the finished step N-2.
-    if (N >= 2) {
-      return stepsWithoutDone[N - 2]
-    }
-    return null
+  // When loading, we show the last available step (N-1), which is currently accumulating content.
+  const activeStep = useMemo(() => {
+    if (!loading || N === 0) return null
+    return stepsWithoutDone[N - 1]
  }, [loading, N, stepsWithoutDone])

  // Determine if the block is truly empty (streaming started but no content/steps yet)
@ -112,7 +108,8 @@ const ThinkingBlock = ({
  }

  // --- Rendering Functions for Expanded View ---
-  const renderStepContent = (step: ThoughtStep, index: number) => {
+  const renderStepContent = (step: ReActStep, index: number) => {
+    // Updated type
    if (step.type === 'done') {
      const timeInSeconds = formatDuration(step.time ?? 0)
      const timeDisplay =
@ -165,7 +162,7 @@ const ThinkingBlock = ({
        </>
      )
    } else {
-      // thought
+      // reasoning
      contentDisplay = (
        <RenderMarkdown isWrapping={true} content={step.content} />
      )
@ -216,21 +213,27 @@ const ThinkingBlock = ({
          </button>
        </div>

-        {/* Streaming/Condensed View - shows previous finished step */}
-        {loading && stepToRenderWhenStreaming && (
+        {/* Streaming/Condensed View - shows active step (N-1) */}
+        {loading && activeStep && (
          <div
-            key={`streaming-${N - 2}`}
+            key={`streaming-${N - 1}`}
            className={cn(
              'mt-4 pl-2 pr-4 text-main-view-fg/60',
-              'animate-in fade-in slide-in-from-top-2 duration-300'
+              // Only animate fade-in if it's not the very first step (N > 1)
+              N > 1 && 'animate-in fade-in slide-in-from-top-2 duration-300'
            )}
          >
            <div className="relative border-main-view-fg/20">
              <div className="relative pl-5">
-                {/* Bullet point */}
-                <div className="absolute left-[-2px] top-1.5 size-2 rounded-full bg-main-view-fg/60 animate-pulse" />
-                {/* Previous completed step content */}
-                {renderStepContent(stepToRenderWhenStreaming, N - 2)}
+                {/* Bullet point/Icon position relative to line */}
+                <div
+                  className={cn(
+                    'absolute left-[-2px] top-1.5 size-2 rounded-full bg-main-view-fg/60',
+                    activeStep.type !== 'done' && 'animate-pulse' // Pulse if active/streaming
+                  )}
+                />
+                {/* Active step content */}
+                {renderStepContent(activeStep, N - 1)}
              </div>
            </div>
          </div>
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@ -28,7 +28,6 @@ import { useTranslation } from '@/i18n/react-i18next-compat'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { extractFilesFromPrompt } from '@/lib/fileMetadata'
 import { createImageAttachment } from '@/types/attachment'
-import { extractThinkingContent } from '@/lib/utils'

 // Define ToolCall interface for type safety when accessing metadata
 interface ToolCall {
@ -43,14 +42,21 @@ interface ToolCall {
  state?: 'pending' | 'completed'
 }

-// Define ThoughtStep type
-type ThoughtStep = {
-  type: 'thought' | 'tool_call' | 'tool_output' | 'done'
+// Define ReActStep type (Reasoning-Action Step)
+type ReActStep = {
+  type: 'reasoning' | 'tool_call' | 'tool_output' | 'done'
  content: string
  metadata?: any
  time?: number
 }

+const cleanReasoning = (content: string) => {
+  return content
+    .replace(/^<think>/, '') // Remove opening tag at start
+    .replace(/<\/think>$/, '') // Remove closing tag at end
+    .trim()
+}
+
 const CopyButton = ({ text }: { text: string }) => {
  const [copied, setCopied] = useState(false)
  const { t } = useTranslation()
@ -135,37 +141,38 @@ export const ThreadContent = memo(
    }, [text, item.role])

    const { reasoningSegment, textSegment } = useMemo(() => {
-      // Check for thinking formats
-      const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
-      const hasAnalysisChannel =
-        text.includes('<|channel|>analysis<|message|>') &&
-        !text.includes('<|start|>assistant<|channel|>final<|message|>')
-
-      if (hasThinkTag || hasAnalysisChannel)
-        return { reasoningSegment: text, textSegment: '' }
+      let reasoningSegment = undefined
+      let textSegment = text

      // Check for completed think tag format
-      const thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/)
-      if (thinkMatch?.index !== undefined) {
-        const splitIndex = thinkMatch.index + thinkMatch[0].length
-        return {
-          reasoningSegment: text.slice(0, splitIndex),
-          textSegment: text.slice(splitIndex),
-        }
-      }
-
-      // Check for completed analysis channel format
-      const analysisMatch = text.match(
-        /<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/
-      )
-      if (analysisMatch?.index !== undefined) {
-        const splitIndex = analysisMatch.index + analysisMatch[0].length
-        return {
-          reasoningSegment: text.slice(0, splitIndex),
-          textSegment: text.slice(splitIndex),
+      console.log(textSegment)
+      const thinkStartTag = '<think>'
+      const thinkEndTag = '</think>'
+
+      const firstThinkIndex = text.indexOf(thinkStartTag)
+      const lastThinkEndIndex = text.lastIndexOf(thinkEndTag)
+
+      if (firstThinkIndex !== -1 && lastThinkEndIndex > firstThinkIndex) {
+        // If multiple <think>...</think> blocks exist sequentially, we capture the entire span
+        // from the start of the first tag to the end of the last tag.
+        const splitIndex = lastThinkEndIndex + thinkEndTag.length
+
+        reasoningSegment = text.slice(firstThinkIndex, splitIndex)
+        textSegment = text.slice(splitIndex).trim()
+
+        return { reasoningSegment, textSegment }
      }
+      // If streaming, and we see the opening tag, the entire message is reasoningSegment
+      const hasThinkTagStart =
+        text.includes(thinkStartTag) && !text.includes(thinkEndTag)
+
+      if (hasThinkTagStart) {
+        reasoningSegment = text
+        textSegment = ''
+        return { reasoningSegment, textSegment }
      }

+      // Default: No reasoning found, or it's a message composed entirely of final text.
      return { reasoningSegment: undefined, textSegment: text }
    }, [text])

@ -252,79 +259,194 @@ export const ThreadContent = memo(
      | { avatar?: React.ReactNode; name?: React.ReactNode }
      | undefined

-    // Constructing allSteps for ThinkingBlock (Fixing Interleaving and Done step)
-    const allSteps: ThoughtStep[] = useMemo(() => {
-      const steps: ThoughtStep[] = []
-
-      // Extract thought paragraphs from reasoningSegment. We assume these are ordered
-      // relative to tool calls.
-      const thoughtText = extractThinkingContent(reasoningSegment || '')
-      const thoughtParagraphs = thoughtText
-        ? thoughtText
-            .split(/\n\s*\n/)
-            .filter((s) => s.trim().length > 0)
-            .map((content) => content.trim())
-        : []
-
-      let thoughtIndex = 0
-
-      // Interleave tool steps and thought steps
-      if (isToolCalls && item.metadata?.tool_calls) {
-        const toolCalls = item.metadata.tool_calls as ToolCall[]
-
-        for (const call of toolCalls) {
-          // Check for thought chunk preceding this tool call
-          if (thoughtIndex < thoughtParagraphs.length) {
-            steps.push({
-              type: 'thought',
-              content: thoughtParagraphs[thoughtIndex],
-            })
-            thoughtIndex++
+    type StreamEvent = {
+      timestamp: number
+      type: 'reasoning_chunk' | 'tool_call' | 'tool_output'
+      data: any
    }

-          // Tool Call Step
+    // Constructing allSteps for ThinkingBlock - CHRONOLOGICAL approach
+    const allSteps: ReActStep[] = useMemo(() => {
+      const steps: ReActStep[] = []
+
+      // Get streamEvents from metadata (if available)
+      const streamEvents = (item.metadata?.streamEvents as StreamEvent[]) || []
+      const toolCalls = (item.metadata?.tool_calls || []) as ToolCall[]
+
+      if (streamEvents.length > 0) {
+        // CHRONOLOGICAL PATH: Use streamEvents for true temporal order
+        let reasoningBuffer = ''
+
+        streamEvents.forEach((event) => {
+          switch (event.type) {
+            case 'reasoning_chunk':
+              // Accumulate reasoning chunks
+              reasoningBuffer += event.data.content
+              break
+
+            case 'tool_call':
+            case 'tool_output':
+              // Flush accumulated reasoning before tool event
+              if (reasoningBuffer.trim()) {
+                const cleanedBuffer = cleanReasoning(reasoningBuffer) // <--- Strip tags here
+
+                // Split accumulated reasoning by paragraphs for display
+                const paragraphs = cleanedBuffer
+                  .split(/\n\s*\n/)
+                  .filter((p) => p.trim().length > 0)
+
+                paragraphs.forEach((para) => {
                  steps.push({
-            type: 'tool_call',
-            content: call.tool?.function?.name || 'Tool Call',
-            metadata: call.tool?.function?.arguments as string,
+                    type: 'reasoning',
+                    content: para.trim(),
+                  })
                })

-          // Tool Output Step
-          if (call.response) {
-            const outputContent =
-              typeof call.response === 'string'
-                ? call.response
-                : JSON.stringify(call.response, null, 2)
+                reasoningBuffer = ''
+              }
+
+              if (event.type === 'tool_call') {
+                // Add tool call
+                const toolCall = event.data.toolCall
+                steps.push({
+                  type: 'tool_call',
+                  content: toolCall?.function?.name || 'Tool Call',
+                  metadata:
+                    typeof toolCall?.function?.arguments === 'string'
+                      ? toolCall.function.arguments
+                      : JSON.stringify(
+                          toolCall?.function?.arguments || {},
+                          null,
+                          2
+                        ),
+                })
+              } else if (event.type === 'tool_output') {
+                // Add tool output
+                const result = event.data.result
+                let outputContent = JSON.stringify(result, null, 2) // Default fallback
+
+                const firstContentPart = result?.content?.[0]
+
+                if (firstContentPart?.type === 'text') {
+                  const textContent = firstContentPart.text
+                  // Robustly check for { value: string } structure or direct string
+                  if (
+                    typeof textContent === 'object' &&
+                    textContent !== null &&
+                    'value' in textContent
+                  ) {
+                    outputContent = textContent.value as string
+                  } else if (typeof textContent === 'string') {
+                    outputContent = textContent
+                  }
+                } else if (typeof result === 'string') {
+                  outputContent = result
+                }

                steps.push({
                  type: 'tool_output',
                  content: outputContent,
                })
              }
+              break
          }
-      }
-
-      // Add remaining thoughts (e.g., final answer formulation thought)
-      while (thoughtIndex < thoughtParagraphs.length) {
-        steps.push({
-          type: 'thought',
-          content: thoughtParagraphs[thoughtIndex],
        })
-        thoughtIndex++
+
+        // Flush any remaining reasoning at the end
+        if (reasoningBuffer.trim()) {
+          const cleanedBuffer = cleanReasoning(reasoningBuffer) // <--- Strip tags here
+          const paragraphs = cleanedBuffer
+            .split(/\n\s*\n/)
+            .filter((p) => p.trim().length > 0)
+
+          paragraphs.forEach((para) => {
+            steps.push({
+              type: 'reasoning',
+              content: para.trim(),
+            })
+          })
+        }
+      } else {
+        console.debug('Fallback mode!!!!')
+        // FALLBACK PATH: No streamEvents - use old paragraph-splitting logic
+        const rawReasoningContent = cleanReasoning(reasoningSegment || '')
+        const reasoningParagraphs = rawReasoningContent
+          ? rawReasoningContent
+              .split(/\n\s*\n/)
+              .filter((s) => s.trim().length > 0)
+              .map((content) => content.trim())
+          : []
+
+        let reasoningIndex = 0
+
+        toolCalls.forEach((call) => {
+          // Add reasoning before this tool call
+          if (reasoningIndex < reasoningParagraphs.length) {
+            steps.push({
+              type: 'reasoning',
+              content: reasoningParagraphs[reasoningIndex],
+            })
+            reasoningIndex++
          }

-      // Add Done step only if the sequence is concluded for display
+          // Add tool call
+          steps.push({
+            type: 'tool_call',
+            content: call.tool?.function?.name || 'Tool Call',
+            metadata:
+              typeof call.tool?.function?.arguments === 'string'
+                ? call.tool.function.arguments
+                : JSON.stringify(call.tool?.function?.arguments || {}, null, 2),
+          })
+
+          // Add tool output
+          if (call.response) {
+            const result = call.response
+            let outputContent = JSON.stringify(result, null, 2)
+
+            const firstContentPart = result?.content?.[0]
+
+            if (firstContentPart?.type === 'text') {
+              const textContent = firstContentPart.text
+              if (
+                typeof textContent === 'object' &&
+                textContent !== null &&
+                'value' in textContent
+              ) {
+                outputContent = textContent.value as string
+              } else if (typeof textContent === 'string') {
+                outputContent = textContent
+              }
+            } else if (typeof result === 'string') {
+              outputContent = result
+            }
+
+            steps.push({
+              type: 'tool_output',
+              content: outputContent,
+            })
+          }
+        })
+
+        // Add remaining reasoning
+        while (reasoningIndex < reasoningParagraphs.length) {
+          steps.push({
+            type: 'reasoning',
+            content: reasoningParagraphs[reasoningIndex],
+          })
+          reasoningIndex++
+        }
+      }
+
+      // Add Done step
      const totalTime = item.metadata?.totalThinkingTime as number | undefined
      const lastStepType = steps[steps.length - 1]?.type

-      // If the message is finalized (not streaming) AND the last step was a tool output
-      // AND there is no subsequent final text, we suppress 'done' to allow seamless transition
-      // to the next assistant message/thought block.
+      if (!isStreamingThisThread && (hasReasoning || isToolCalls)) {
        const endsInToolOutputWithoutFinalText =
          lastStepType === 'tool_output' && textSegment.length === 0

-      if (!isStreamingThisThread && (hasReasoning || isToolCalls)) {
-        if (textSegment.length > 0 || !endsInToolOutputWithoutFinalText) {
+        if (!endsInToolOutputWithoutFinalText) {
          steps.push({
            type: 'done',
            content: 'Done',
@ -335,11 +457,11 @@ export const ThreadContent = memo(

      return steps
    }, [
+      item,
      reasoningSegment,
-      isToolCalls,
-      item.metadata,
      isStreamingThisThread,
      hasReasoning,
+      isToolCalls,
      textSegment,
    ])
    // END: Constructing allSteps
@ -504,10 +626,7 @@ export const ThreadContent = memo(
              />
            )}

-            <RenderMarkdown
-              content={textSegment.replace('</think>', '')}
-              components={linkComponents}
-            />
+            <RenderMarkdown content={textSegment} components={linkComponents} />

            {!isToolCalls && (
              <div className="flex items-center gap-2 text-main-view-fg/60 text-xs">
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -41,6 +41,12 @@ import { TEMPORARY_CHAT_QUERY_ID, TEMPORARY_CHAT_ID } from '@/constants/chat'
 import { toast } from 'sonner'
 import { Attachment } from '@/types/attachment'

+type StreamEvent = {
+  timestamp: number
+  type: 'reasoning_chunk' | 'tool_call' | 'tool_output'
+  data: any
+}
+
 export const useChat = () => {
  const [
    updateTokenSpeed,
@ -279,6 +285,8 @@ export const useChat = () => {
      const selectedProvider = useModelProvider.getState().selectedProvider
      let activeProvider = getProviderByName(selectedProvider)

+      const streamEvents: StreamEvent[] = []
+
      resetTokenSpeed()
      if (!activeThread || !activeProvider) return

@ -555,6 +563,7 @@ export const useChat = () => {
                      ...e,
                      state: 'pending',
                    })),
+                    streamEvents: streamEvents,
                  }
                )
                updateStreamingContent(currentContent)
@ -591,6 +600,7 @@ export const useChat = () => {
                    ...e,
                    state: 'pending',
                  })),
+                  streamEvents: streamEvents,
                }
              )
              updateStreamingContent(currentContent)
@ -636,16 +646,37 @@ export const useChat = () => {
                if ('usage' in part && part.usage) {
                  tokenUsage = part.usage
                }
+                const deltaToolCalls = part.choices[0]?.delta?.tool_calls
+                if (deltaToolCalls) {
+                  const index = deltaToolCalls[0]?.index
+                  // Check if this chunk starts a brand new tool call
+                  const isNewToolCallStart =
+                    index !== undefined && toolCalls[index] === undefined

-                if (part.choices[0]?.delta?.tool_calls) {
                  extractToolCall(part, currentCall, toolCalls)
+
+                  if (isNewToolCallStart) {
+                    // Track tool call event only when it begins
+                    // toolCalls[index] is the newly created object due to extractToolCall
+                    streamEvents.push({
+                      timestamp: Date.now(),
+                      type: 'tool_call',
+                      data: { toolCall: toolCalls[index] },
+                    })
                    // Schedule a flush to reflect tool update
                    scheduleFlush()
                  }
+                }
                const deltaReasoning =
                  reasoningProcessor.processReasoningChunk(part)
                if (deltaReasoning) {
                  accumulatedText += deltaReasoning
+                  // Track reasoning event
+                  streamEvents.push({
+                    timestamp: Date.now(),
+                    type: 'reasoning_chunk',
+                    data: { content: deltaReasoning },
+                  })
                  pendingDeltaCount += 1
                  // Schedule flush for reasoning updates
                  scheduleFlush()
@ -728,6 +759,7 @@ export const useChat = () => {
        const messageMetadata: Record<string, any> = {
          tokenSpeed: useAppState.getState().tokenSpeed,
          assistant: currentAssistant,
+          streamEvents, // Add chronological events
        }

        if (accumulatedText.includes('<think>') || toolCalls.length > 0) {
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@ -687,6 +687,16 @@ export const postMessageProcessing = async (
      toolCallEntry.response = result
      toolCallEntry.state = 'ready'
      if (updateStreamingUI) updateStreamingUI({ ...message }) // Show result
+      const streamEvents = (message.metadata?.streamEvents || []) as any[]
+      streamEvents.push({
+        timestamp: Date.now(),
+        type: 'tool_output',
+        data: { result: result },
+      })
+      message.metadata = {
+        ...(message.metadata ?? {}),
+        streamEvents: streamEvents,
+      }
      builder.addToolMessage(result as ToolResult, toolCall.id)

      // Proactive mode: Capture screenshot/snapshot after browser tool execution
@ -734,6 +744,7 @@ export const postMessageProcessing = async (
        if (followUpCompletion) {
          let followUpText = ''
          const newToolCalls: ChatCompletionMessageToolCall[] = []
+          const streamEvents = (message.metadata?.streamEvents || []) as any[]
          const textContent = message.content.find(
            (c) => c.type === ContentType.Text
          )
@ -758,19 +769,56 @@ export const postMessageProcessing = async (

              if (textContent?.text) {
                if (deltaReasoning) textContent.text.value += deltaReasoning
-                if (deltaContent) textContent.text.value += deltaContent
+                if (deltaContent) {
+                  textContent.text.value += deltaContent
+                  followUpText += deltaContent
+                  console.log(`delta content from followup:\n${deltaContent}`)
                }
-              if (deltaContent) followUpText += deltaContent
+              }
+              if (deltaReasoning) {
+                streamEvents.push({
+                  timestamp: Date.now(),
+                  type: 'reasoning_chunk',
+                  data: { content: deltaReasoning },
+                })
+              }
+              const initialToolCallCount = newToolCalls.length

              if (chunk.choices[0]?.delta?.tool_calls) {
                extractToolCall(chunk, null, newToolCalls)
+                if (newToolCalls.length > initialToolCallCount) {
+                  // The new tool call is the last element added
+                  streamEvents.push({
+                    timestamp: Date.now(),
+                    type: 'tool_call',
+                    data: { toolCall: newToolCalls[newToolCalls.length - 1] },
+                  })
+                }
+              }
+              // Ensure the metadata is updated before calling updateStreamingUI
+              message.metadata = {
+                ...(message.metadata ?? {}),
+                streamEvents: streamEvents,
              }

-              if (updateStreamingUI) updateStreamingUI({ ...message })
+              if (updateStreamingUI) {
+                // FIX: Create a new object reference for the content array
+                // This forces the memoized component to detect the change in the mutated text
+                const uiMessage: ThreadMessage = {
+                  ...message,
+                  content: message.content.map((c) => ({ ...c })), // Shallow copy array and its parts
                }
-            if (textContent?.text) {
-              textContent.text.value += reasoningProcessor.finalize()
-              if (updateStreamingUI) updateStreamingUI({ ...message })
+                updateStreamingUI(uiMessage)
+              }
+            }
+            if (textContent?.text && updateStreamingUI) {
+              // FIX: Create a new object reference for the content array
+              // This forces the memoized component to detect the change in the mutated text
+              const uiMessage: ThreadMessage = {
+                ...message,
+                content: message.content.map((c) => ({ ...c })), // Shallow copy array and its parts
+              }
+              updateStreamingUI(uiMessage)
            }
          }