refactor: simplify reasoning handling in ThreadContent and related hooks

- Remove legacy <think> tag parsing and accumulation of reasoning chunks in the main text buffer. - Rely exclusively on `streamEvents` to derive reasoning and loading state. - Update loading logic to account for both tool calls and reasoning events. - Adjust memo dependencies and return values to avoid stale references. - Update `useChat` and `completion.ts` to stop mutating the accumulated text with reasoning, keeping the logic purely event‑driven. - Ensure the ThinkingBlock always renders from the structured steps list, improving consistency and eliminating duplicate content.
2025-10-29 18:58:35 +05:30 · 2025-10-29 18:58:35 +05:30 · 0a5e107d0f
commit 0a5e107d0f
parent 37c4a65dbd
3 changed files with 27 additions and 71 deletions
--- a/web-app/src/containers/ThreadContent.tsx
+++ b/web-app/src/containers/ThreadContent.tsx
@ -146,66 +146,11 @@ export const ThreadContent = memo(
      isReasoningActiveLoading,
      hasReasoningSteps,
    } = useMemo(() => {
-      const thinkStartTag = '<think>'
+      // With the streaming functions updated, the text variable now only contains the final output.
-      const thinkEndTag = '</think>'
+      const currentFinalText = text.trim()
-      let currentFinalText = ''
+      const currentReasoning = '' // Reasoning is now only derived from streamEvents/allSteps
      let currentReasoning = ''
      let hasSteps = false
-      const firstThinkStart = text.indexOf(thinkStartTag)
+      // Check for tool calls or reasoning events in metadata to determine steps/loading
      const lastThinkStart = text.lastIndexOf(thinkStartTag)
      const lastThinkEnd = text.lastIndexOf(thinkEndTag)
      // Check if there's an unclosed <think> tag
      const hasOpenThink = lastThinkStart > lastThinkEnd
      if (firstThinkStart === -1) {
        // No <think> tags at all - everything is final output
        currentFinalText = text
      } else if (hasOpenThink && isStreamingThisThread) {
        // CASE 1: There's an open <think> tag during streaming
        // Everything from FIRST <think> onward is reasoning
        hasSteps = true
        // Text before first <think> is final output
        currentFinalText = text.substring(0, firstThinkStart)
        // Everything from first <think> onward is reasoning
        const reasoningText = text.substring(firstThinkStart)
        // Extract content from all <think> blocks (both closed and open)
        const reasoningRegex = /<think>([\s\S]*?)(?:<\/think>|$)/g
        const matches = [...reasoningText.matchAll(reasoningRegex)]
        const reasoningParts = matches.map((match) => cleanReasoning(match[1]))
        currentReasoning = reasoningParts.join('\n\n')
      } else {
        // CASE 2: All <think> tags are closed
        // Extract reasoning from inside tags, everything else is final output
        hasSteps = true
        const reasoningRegex = /<think>[\s\S]*?<\/think>/g
        const matches = [...text.matchAll(reasoningRegex)]
        let lastIndex = 0
        // Build final output from text between/outside <think> blocks
        for (const match of matches) {
          currentFinalText += text.substring(lastIndex, match.index)
          lastIndex = match.index + match[0].length
        }
        // Add remaining text after last </think>
        currentFinalText += text.substring(lastIndex)
        // Extract reasoning content
        const reasoningParts = matches.map((match) => {
          const content = match[0].replace(/<think>|<\/think>/g, '')
          return cleanReasoning(content)
        })
        currentReasoning = reasoningParts.join('\n\n')
      }
      // Check for tool calls
      const isToolCallsPresent = !!(
        item.metadata &&
        'tool_calls' in item.metadata &&
@ -213,19 +158,29 @@ export const ThreadContent = memo(
        item.metadata.tool_calls.length > 0
      )
-      hasSteps = hasSteps || isToolCallsPresent
+      // Check for any reasoning chunks in the streamEvents
      const hasReasoningEvents = !!(
        item.metadata &&
        'streamEvents' in item.metadata &&
        Array.isArray(item.metadata.streamEvents) &&
        item.metadata.streamEvents.some(
          (e: StreamEvent) => e.type === 'reasoning_chunk'
        )
      )
-      // Loading if streaming and no final output yet
+      const hasSteps = isToolCallsPresent || hasReasoningEvents
      // Loading if streaming, no final output yet, but we expect steps (reasoning or tool calls)
      const loading =
-        isStreamingThisThread && currentFinalText.trim().length === 0
+        isStreamingThisThread && currentFinalText.length === 0 && hasSteps
      return {
-        finalOutputText: currentFinalText.trim(),
+        finalOutputText: currentFinalText,
        streamedReasoningText: currentReasoning,
        isReasoningActiveLoading: loading,
        hasReasoningSteps: hasSteps,
      }
-    }, [item.content, isStreamingThisThread, item.metadata, text])
+    }, [item.metadata, text, isStreamingThisThread])
    const isToolCalls =
      item.metadata &&
@ -516,7 +471,6 @@ export const ThreadContent = memo(
    // END: Constructing allSteps
    // ====================================================================
    // FIX: Determine which text prop to pass to ThinkingBlock
    // If we have streamEvents, rely on 'steps' and pass an empty text buffer.
    const streamingTextBuffer = useMemo(() => {
      const streamEvents = item.metadata?.streamEvents
@ -528,9 +482,11 @@ export const ThreadContent = memo(
        return ''
      }
-      // Otherwise, rely on the raw text buffer for rendering (used during initial stream fallback)
+      // Since we no longer concatenate reasoning to the main text,
-      return streamedReasoningText
+      // the only time we'd rely on text buffer is if streamEvents fails to load.
-    }, [item.metadata?.streamEvents, streamedReasoningText]) // Use the object reference for dependency array
+      // For robustness, we can simply return an empty string to force use of 'steps'.
      return ''
    }, [item.metadata?.streamEvents]) // Use the object reference for dependency array
    // ====================================================================
    // Determine if we should show the thinking block
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@ -671,7 +671,7 @@ export const useChat = () => {
                const deltaReasoning =
                  reasoningProcessor.processReasoningChunk(part)
                if (deltaReasoning) {
-                  accumulatedText += deltaReasoning
+                  // accumulatedText += deltaReasoning
                  // Track reasoning event
                  streamEvents.push({
                    timestamp: Date.now(),
@ -705,7 +705,7 @@ export const useChat = () => {
              // Only finalize and flush if not aborted
              if (!abortController.signal.aborted) {
                // Finalize reasoning (close any open think tags)
-                accumulatedText += reasoningProcessor.finalize()
+                // accumulatedText += reasoningProcessor.finalize()
                // Ensure any pending buffered content is rendered at the end
                flushIfPending()
              }
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@ -777,7 +777,7 @@ export const postMessageProcessing = async (
              const deltaContent = chunk.choices[0]?.delta?.content || ''
              if (textContent?.text) {
-                if (deltaReasoning) textContent.text.value += deltaReasoning
+                // if (deltaReasoning) textContent.text.value += deltaReasoning
                if (deltaContent) {
                  textContent.text.value += deltaContent
                  followUpText += deltaContent