feat: Refactor reasoning/tool parsing and fix infinite tool loop prevention

This commit significantly refactors how assistant message content containing reasoning steps (<think> blocks) and tool calls is parsed and split into final output text and streamed reasoning text in `ThreadContent.tsx`. It introduces new logic to correctly handle multiple, open, or closed `<think>` tags, ensuring that: 1. All text outside of `<think>...</think>` tags is correctly extracted as final output text. 2. Content inside all `<think>` tags is aggregated as streamed reasoning text. 3. The message correctly determines if reasoning is actively loading during a stream. Additionally, this commit: * **Fixes infinite tool loop prevention:** The global `toolStepCounter` in `completion.ts` is replaced with an explicit `currentStepCount` parameter passed recursively in `postMessageProcessing`. This ensures that the tool step limit is correctly enforced per message chain, preventing potential race conditions and correctly resolving the chain. * **Fixes large step content rendering:** Limits the content of a single thinking step in `ThinkingBlock.tsx` to 1000 characters to prevent UI slowdowns from rendering extremely large JSON or text outputs.
2025-10-22 20:16:20 +05:30 · 2025-10-22 20:16:20 +05:30 · d83b569f17
commit d83b569f17
parent 6e46988b03
3 changed files with 173 additions and 80 deletions
--- a/web-app/src/containers/ThinkingBlock.tsx
+++ b/web-app/src/containers/ThinkingBlock.tsx
@ -156,7 +156,7 @@ const ThinkingBlock = ({
          <div className="mt-1">
            <RenderMarkdown
              isWrapping={true}
-              content={'```json\n' + step.content + '\n```'}
+              content={step.content.substring(0, 1000)}
            />
          </div>
        </>
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@ -140,44 +140,98 @@ export const ThreadContent = memo(
      return { files: [], cleanPrompt: text }
    }, [text, item.role])
-    const { reasoningSegment, textSegment } = useMemo(() => {
+    const {
-      let reasoningSegment = undefined
+      finalOutputText,
-      let textSegment = text
+      streamedReasoningText,
-
+      isReasoningActiveLoading,
-      // Check for completed think tag format
+      hasReasoningSteps,
-      console.log(textSegment)
+    } = useMemo(() => {
      const thinkStartTag = '<think>'
      const thinkEndTag = '</think>'
      let currentFinalText = ''
      let currentReasoning = ''
      let hasSteps = false
-      const firstThinkIndex = text.indexOf(thinkStartTag)
+      const firstThinkStart = text.indexOf(thinkStartTag)
-      const lastThinkEndIndex = text.lastIndexOf(thinkEndTag)
+      const lastThinkStart = text.lastIndexOf(thinkStartTag)
      const lastThinkEnd = text.lastIndexOf(thinkEndTag)
-      if (firstThinkIndex !== -1 && lastThinkEndIndex > firstThinkIndex) {
+      // Check if there's an unclosed <think> tag
-        // If multiple <think>...</think> blocks exist sequentially, we capture the entire span
+      const hasOpenThink = lastThinkStart > lastThinkEnd
        // from the start of the first tag to the end of the last tag.
        const splitIndex = lastThinkEndIndex + thinkEndTag.length
-        reasoningSegment = text.slice(firstThinkIndex, splitIndex)
+      if (firstThinkStart === -1) {
-        textSegment = text.slice(splitIndex).trim()
+        // No <think> tags at all - everything is final output
        currentFinalText = text
      } else if (hasOpenThink && isStreamingThisThread) {
        // CASE 1: There's an open <think> tag during streaming
        // Everything from FIRST <think> onward is reasoning
        hasSteps = true
-        return { reasoningSegment, textSegment }
+        // Text before first <think> is final output
-      }
+        currentFinalText = text.substring(0, firstThinkStart)
      // If streaming, and we see the opening tag, the entire message is reasoningSegment
      const hasThinkTagStart =
        text.includes(thinkStartTag) && !text.includes(thinkEndTag)
-      if (hasThinkTagStart) {
+        // Everything from first <think> onward is reasoning
-        reasoningSegment = text
+        const reasoningText = text.substring(firstThinkStart)
-        textSegment = ''
+
-        return { reasoningSegment, textSegment }
+        // Extract content from all <think> blocks (both closed and open)
        const reasoningRegex = /<think>([\s\S]*?)(?:<\/think>|$)/g
        const matches = [...reasoningText.matchAll(reasoningRegex)]
        const reasoningParts = matches.map((match) => cleanReasoning(match[1]))
        currentReasoning = reasoningParts.join('\n\n')
      } else {
        // CASE 2: All <think> tags are closed
        // Extract reasoning from inside tags, everything else is final output
        hasSteps = true
        const reasoningRegex = /<think>[\s\S]*?<\/think>/g
        const matches = [...text.matchAll(reasoningRegex)]
        let lastIndex = 0
        // Build final output from text between/outside <think> blocks
        for (const match of matches) {
          currentFinalText += text.substring(lastIndex, match.index)
          lastIndex = match.index + match[0].length
        }
        // Add remaining text after last </think>
        currentFinalText += text.substring(lastIndex)
        // Extract reasoning content
        const reasoningParts = matches.map((match) => {
          const content = match[0].replace(/<think>|<\/think>/g, '')
          return cleanReasoning(content)
        })
        currentReasoning = reasoningParts.join('\n\n')
      }
-      // Default: No reasoning found, or it's a message composed entirely of final text.
+      // Check for tool calls
-      return { reasoningSegment: undefined, textSegment: text }
+      const isToolCallsPresent = !!(
-    }, [text])
+        item.metadata &&
        'tool_calls' in item.metadata &&
        Array.isArray(item.metadata.tool_calls) &&
        item.metadata.tool_calls.length > 0
      )
-    // Check if reasoning segment is actually present (i.e., non-empty string)
+      hasSteps = hasSteps || isToolCallsPresent
-    const hasReasoning = !!reasoningSegment
+
      // Loading if streaming and no final output yet
      const loading =
        isStreamingThisThread && currentFinalText.trim().length === 0
      return {
        finalOutputText: currentFinalText.trim(),
        streamedReasoningText: currentReasoning,
        isReasoningActiveLoading: loading,
        hasReasoningSteps: hasSteps,
      }
    }, [item.content, isStreamingThisThread, item.metadata, text])
    const isToolCalls =
      item.metadata &&
      'tool_calls' in item.metadata &&
      Array.isArray(item.metadata.tool_calls) &&
      item.metadata.tool_calls.length
    const getMessages = useMessages((state) => state.getMessages)
    const deleteMessage = useMessages((state) => state.deleteMessage)
@ -249,12 +303,6 @@ export const ThreadContent = memo(
      }
    }, [deleteMessage, getMessages, item])
    const isToolCalls =
      item.metadata &&
      'tool_calls' in item.metadata &&
      Array.isArray(item.metadata.tool_calls) &&
      item.metadata.tool_calls.length
    const assistant = item.metadata?.assistant as
      | { avatar?: React.ReactNode; name?: React.ReactNode }
      | undefined
@ -273,6 +321,8 @@ export const ThreadContent = memo(
      const streamEvents = (item.metadata?.streamEvents as StreamEvent[]) || []
      const toolCalls = (item.metadata?.tool_calls || []) as ToolCall[]
      const isMessageFinalized = !isStreamingThisThread
      if (streamEvents.length > 0) {
        // CHRONOLOGICAL PATH: Use streamEvents for true temporal order
        let reasoningBuffer = ''
@ -366,10 +416,10 @@ export const ThreadContent = memo(
            })
          })
        }
-      } else {
+      } else if (isMessageFinalized) {
-        console.debug('Fallback mode!!!!')
+        // FALLBACK PATH: No streamEvents - use split text for content construction
-        // FALLBACK PATH: No streamEvents - use old paragraph-splitting logic
+
-        const rawReasoningContent = cleanReasoning(reasoningSegment || '')
+        const rawReasoningContent = streamedReasoningText || ''
        const reasoningParagraphs = rawReasoningContent
          ? rawReasoningContent
              .split(/\n\s*\n/)
@ -442,9 +492,9 @@ export const ThreadContent = memo(
      const totalTime = item.metadata?.totalThinkingTime as number | undefined
      const lastStepType = steps[steps.length - 1]?.type
-      if (!isStreamingThisThread && (hasReasoning || isToolCalls)) {
+      if (!isStreamingThisThread && hasReasoningSteps) {
        const endsInToolOutputWithoutFinalText =
-          lastStepType === 'tool_output' && textSegment.length === 0
+          lastStepType === 'tool_output' && finalOutputText.length === 0
        if (!endsInToolOutputWithoutFinalText) {
          steps.push({
@ -458,22 +508,34 @@ export const ThreadContent = memo(
      return steps
    }, [
      item,
      reasoningSegment,
      isStreamingThisThread,
-      hasReasoning,
+      hasReasoningSteps,
-      isToolCalls,
+      finalOutputText,
-      textSegment,
+      streamedReasoningText,
    ])
    // END: Constructing allSteps
-    // Determine if reasoning phase is actively loading
+    // ====================================================================
-    // Loading is true only if streaming is happening AND we haven't started outputting final text yet.
+    // FIX: Determine which text prop to pass to ThinkingBlock
-    const isReasoningActiveLoading =
+    // If we have streamEvents, rely on 'steps' and pass an empty text buffer.
-      isStreamingThisThread && textSegment.length === 0
+    const streamingTextBuffer = useMemo(() => {
      const streamEvents = item.metadata?.streamEvents
-    // Determine if we should show the thinking block (has reasoning OR tool calls OR currently loading reasoning)
+      // Check if streamEvents exists AND is an array AND has a length greater than 0
      if (Array.isArray(streamEvents) && streamEvents.length > 0) {
        // We are using the chronological path (allSteps) for rendering
        // Return empty string to disable the ThinkingBlock's raw text buffer
        return ''
      }
      // Otherwise, rely on the raw text buffer for rendering (used during initial stream fallback)
      return streamedReasoningText
    }, [item.metadata?.streamEvents, streamedReasoningText]) // Use the object reference for dependency array
    // ====================================================================
    // Determine if we should show the thinking block
    const shouldShowThinkingBlock =
-      hasReasoning || isToolCalls || isReasoningActiveLoading
+      hasReasoningSteps || isToolCalls || isReasoningActiveLoading
    return (
      <Fragment>
@ -614,19 +676,25 @@ export const ThreadContent = memo(
              <ThinkingBlock
                id={
                  item.isLastMessage
-                    ? `${item.thread_id}-last-${(reasoningSegment || text).slice(0, 50).replace(/\s/g, '').slice(-10)}`
+                    ? `${item.thread_id}-last-${(streamingTextBuffer || text).slice(0, 50).replace(/\s/g, '').slice(-10)}`
                    : `${item.thread_id}-${item.index ?? item.id}`
                }
-                text={reasoningSegment || ''}
+                // Pass the safe buffer
                text={streamingTextBuffer}
                steps={allSteps}
-                loading={isReasoningActiveLoading} // Req 2: False if textSegment is starting
+                loading={isReasoningActiveLoading}
                duration={
                  item.metadata?.totalThinkingTime as number | undefined
                }
              />
            )}
-            <RenderMarkdown content={textSegment} components={linkComponents} />
+            {!isReasoningActiveLoading && finalOutputText.length > 0 && (
              <RenderMarkdown
                content={finalOutputText}
                components={linkComponents}
              />
            )}
            {!isToolCalls && (
              <div className="flex items-center gap-2 text-main-view-fg/60 text-xs">
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@ -407,9 +407,6 @@ export const extractToolCall = (
  return calls
 }
 // Keep track of total tool steps to prevent infinite loops
 let toolStepCounter = 0
 /**
 * Helper function to check if a tool call is a browser MCP tool
 * @param toolName - The name of the tool
@ -533,6 +530,12 @@ const filterOldProactiveScreenshots = (builder: CompletionMessagesBuilder) => {
 * @param approvedTools
 * @param showModal
 * @param allowAllMCPPermissions
 * @param thread
 * @param provider
 * @param tools
 * @param updateStreamingUI
 * @param maxToolSteps
 * @param currentStepCount - Internal counter for recursive calls (do not set manually)
 * @param isProactiveMode
 */
 export const postMessageProcessing = async (
@ -552,16 +555,20 @@ export const postMessageProcessing = async (
  tools: MCPTool[] = [],
  updateStreamingUI?: (content: ThreadMessage) => void,
  maxToolSteps: number = 20,
  currentStepCount: number = 0,
  isProactiveMode: boolean = false
 ): Promise<ThreadMessage> => {
  // Reset counter at the start of a new message processing chain
  if (toolStepCounter === 0) {
    toolStepCounter = 0
  }
  // Handle completed tool calls
  if (calls.length > 0) {
-    toolStepCounter++
+    // Check limit BEFORE processing
    if (currentStepCount >= maxToolSteps) {
      console.warn(
        `Reached maximum tool steps (${maxToolSteps}), stopping chain to prevent infinite loop`
      )
      return message
    }
    const nextStepCount = currentStepCount + 1
    // Fetch RAG tool names from RAG service
    let ragToolNames = new Set<string>()
@ -687,6 +694,7 @@ export const postMessageProcessing = async (
      toolCallEntry.response = result
      toolCallEntry.state = 'ready'
      if (updateStreamingUI) updateStreamingUI({ ...message }) // Show result
      const streamEvents = (message.metadata?.streamEvents || []) as any[]
      streamEvents.push({
        timestamp: Date.now(),
@ -701,13 +709,16 @@ export const postMessageProcessing = async (
      // Proactive mode: Capture screenshot/snapshot after browser tool execution
      if (isProactiveMode && isBrowserTool && !abortController.signal.aborted) {
-        console.log('Proactive mode: Capturing screenshots after browser tool call')
+        console.log(
          'Proactive mode: Capturing screenshots after browser tool call'
        )
        // Filter out old screenshots before adding new ones
        filterOldProactiveScreenshots(builder)
        // Capture new screenshots
-        const proactiveScreenshots = await captureProactiveScreenshots(abortController)
+        const proactiveScreenshots =
          await captureProactiveScreenshots(abortController)
        // Add proactive screenshots to builder
        for (const screenshot of proactiveScreenshots) {
@ -722,12 +733,8 @@ export const postMessageProcessing = async (
      // update message metadata
    }
-    if (
+    // Process follow-up completion if conditions are met
-      thread &&
+    if (thread && provider && !abortController.signal.aborted) {
      provider &&
      !abortController.signal.aborted &&
      toolStepCounter < maxToolSteps
    ) {
      try {
        const messagesWithToolResults = builder.getMessages()
@ -750,6 +757,7 @@ export const postMessageProcessing = async (
          )
          if (isCompletionResponse(followUpCompletion)) {
            // Handle non-streaming response
            const choice = followUpCompletion.choices[0]
            const content = choice?.message?.content
            if (content) followUpText = content as string
@ -759,6 +767,7 @@ export const postMessageProcessing = async (
            if (textContent?.text) textContent.text.value += followUpText
            if (updateStreamingUI) updateStreamingUI({ ...message })
          } else {
            // Handle streaming response
            const reasoningProcessor = new ReasoningProcessor()
            for await (const chunk of followUpCompletion) {
              if (abortController.signal.aborted) break
@ -772,9 +781,9 @@ export const postMessageProcessing = async (
                if (deltaContent) {
                  textContent.text.value += deltaContent
                  followUpText += deltaContent
                  console.log(`delta content from followup:\n${deltaContent}`)
                }
              }
              if (deltaReasoning) {
                streamEvents.push({
                  timestamp: Date.now(),
@ -782,6 +791,7 @@ export const postMessageProcessing = async (
                  data: { content: deltaReasoning },
                })
              }
              const initialToolCallCount = newToolCalls.length
              if (chunk.choices[0]?.delta?.tool_calls) {
@ -795,6 +805,7 @@ export const postMessageProcessing = async (
                  })
                }
              }
              // Ensure the metadata is updated before calling updateStreamingUI
              message.metadata = {
                ...(message.metadata ?? {}),
@ -802,26 +813,27 @@ export const postMessageProcessing = async (
              }
              if (updateStreamingUI) {
-                // FIX: Create a new object reference for the content array
+                // Create a new object reference for the content array
                // This forces the memoized component to detect the change in the mutated text
                const uiMessage: ThreadMessage = {
                  ...message,
-                  content: message.content.map((c) => ({ ...c })), // Shallow copy array and its parts
+                  content: message.content.map((c) => ({ ...c })),
                }
                updateStreamingUI(uiMessage)
              }
            }
            if (textContent?.text && updateStreamingUI) {
-              // FIX: Create a new object reference for the content array
+              // Final UI update after streaming completes
              // This forces the memoized component to detect the change in the mutated text
              const uiMessage: ThreadMessage = {
                ...message,
-                content: message.content.map((c) => ({ ...c })), // Shallow copy array and its parts
+                content: message.content.map((c) => ({ ...c })),
              }
              updateStreamingUI(uiMessage)
            }
          }
          // Recursively process new tool calls if any
          if (newToolCalls.length > 0) {
            builder.addAssistantMessage(followUpText, undefined, newToolCalls)
            await postMessageProcessing(
@ -837,6 +849,7 @@ export const postMessageProcessing = async (
              tools,
              updateStreamingUI,
              maxToolSteps,
              nextStepCount, // Pass the incremented step count
              isProactiveMode
            )
          }
@ -846,11 +859,23 @@ export const postMessageProcessing = async (
          'Failed to get follow-up completion after tool execution:',
          String(error)
        )
        // Optionally add error to message metadata for UI display
        const streamEvents = (message.metadata?.streamEvents || []) as any[]
        streamEvents.push({
          timestamp: Date.now(),
          type: 'error',
          data: {
            message: 'Follow-up completion failed',
            error: String(error),
          },
        })
        message.metadata = {
          ...(message.metadata ?? {}),
          streamEvents: streamEvents,
        }
      }
    }
  }
  // Reset counter when the chain is fully resolved
  toolStepCounter = 0
  return message
 }