feat: Refactor reasoning/tool parsing and fix infinite tool loop prevention
This commit significantly refactors how assistant message content containing reasoning steps (<think> blocks) and tool calls is parsed and split into final output text and streamed reasoning text in `ThreadContent.tsx`. It introduces new logic to correctly handle multiple, open, or closed `<think>` tags, ensuring that: 1. All text outside of `<think>...</think>` tags is correctly extracted as final output text. 2. Content inside all `<think>` tags is aggregated as streamed reasoning text. 3. The message correctly determines if reasoning is actively loading during a stream. Additionally, this commit: * **Fixes infinite tool loop prevention:** The global `toolStepCounter` in `completion.ts` is replaced with an explicit `currentStepCount` parameter passed recursively in `postMessageProcessing`. This ensures that the tool step limit is correctly enforced per message chain, preventing potential race conditions and correctly resolving the chain. * **Fixes large step content rendering:** Limits the content of a single thinking step in `ThinkingBlock.tsx` to 1000 characters to prevent UI slowdowns from rendering extremely large JSON or text outputs.
This commit is contained in:
parent
6e46988b03
commit
d83b569f17
@ -156,7 +156,7 @@ const ThinkingBlock = ({
|
||||
<div className="mt-1">
|
||||
<RenderMarkdown
|
||||
isWrapping={true}
|
||||
content={'```json\n' + step.content + '\n```'}
|
||||
content={step.content.substring(0, 1000)}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
|
||||
@ -140,44 +140,98 @@ export const ThreadContent = memo(
|
||||
return { files: [], cleanPrompt: text }
|
||||
}, [text, item.role])
|
||||
|
||||
const { reasoningSegment, textSegment } = useMemo(() => {
|
||||
let reasoningSegment = undefined
|
||||
let textSegment = text
|
||||
|
||||
// Check for completed think tag format
|
||||
console.log(textSegment)
|
||||
const {
|
||||
finalOutputText,
|
||||
streamedReasoningText,
|
||||
isReasoningActiveLoading,
|
||||
hasReasoningSteps,
|
||||
} = useMemo(() => {
|
||||
const thinkStartTag = '<think>'
|
||||
const thinkEndTag = '</think>'
|
||||
let currentFinalText = ''
|
||||
let currentReasoning = ''
|
||||
let hasSteps = false
|
||||
|
||||
const firstThinkIndex = text.indexOf(thinkStartTag)
|
||||
const lastThinkEndIndex = text.lastIndexOf(thinkEndTag)
|
||||
const firstThinkStart = text.indexOf(thinkStartTag)
|
||||
const lastThinkStart = text.lastIndexOf(thinkStartTag)
|
||||
const lastThinkEnd = text.lastIndexOf(thinkEndTag)
|
||||
|
||||
if (firstThinkIndex !== -1 && lastThinkEndIndex > firstThinkIndex) {
|
||||
// If multiple <think>...</think> blocks exist sequentially, we capture the entire span
|
||||
// from the start of the first tag to the end of the last tag.
|
||||
const splitIndex = lastThinkEndIndex + thinkEndTag.length
|
||||
// Check if there's an unclosed <think> tag
|
||||
const hasOpenThink = lastThinkStart > lastThinkEnd
|
||||
|
||||
reasoningSegment = text.slice(firstThinkIndex, splitIndex)
|
||||
textSegment = text.slice(splitIndex).trim()
|
||||
if (firstThinkStart === -1) {
|
||||
// No <think> tags at all - everything is final output
|
||||
currentFinalText = text
|
||||
} else if (hasOpenThink && isStreamingThisThread) {
|
||||
// CASE 1: There's an open <think> tag during streaming
|
||||
// Everything from FIRST <think> onward is reasoning
|
||||
hasSteps = true
|
||||
|
||||
return { reasoningSegment, textSegment }
|
||||
}
|
||||
// If streaming, and we see the opening tag, the entire message is reasoningSegment
|
||||
const hasThinkTagStart =
|
||||
text.includes(thinkStartTag) && !text.includes(thinkEndTag)
|
||||
// Text before first <think> is final output
|
||||
currentFinalText = text.substring(0, firstThinkStart)
|
||||
|
||||
if (hasThinkTagStart) {
|
||||
reasoningSegment = text
|
||||
textSegment = ''
|
||||
return { reasoningSegment, textSegment }
|
||||
// Everything from first <think> onward is reasoning
|
||||
const reasoningText = text.substring(firstThinkStart)
|
||||
|
||||
// Extract content from all <think> blocks (both closed and open)
|
||||
const reasoningRegex = /<think>([\s\S]*?)(?:<\/think>|$)/g
|
||||
const matches = [...reasoningText.matchAll(reasoningRegex)]
|
||||
const reasoningParts = matches.map((match) => cleanReasoning(match[1]))
|
||||
currentReasoning = reasoningParts.join('\n\n')
|
||||
} else {
|
||||
// CASE 2: All <think> tags are closed
|
||||
// Extract reasoning from inside tags, everything else is final output
|
||||
hasSteps = true
|
||||
|
||||
const reasoningRegex = /<think>[\s\S]*?<\/think>/g
|
||||
const matches = [...text.matchAll(reasoningRegex)]
|
||||
|
||||
let lastIndex = 0
|
||||
|
||||
// Build final output from text between/outside <think> blocks
|
||||
for (const match of matches) {
|
||||
currentFinalText += text.substring(lastIndex, match.index)
|
||||
lastIndex = match.index + match[0].length
|
||||
}
|
||||
|
||||
// Add remaining text after last </think>
|
||||
currentFinalText += text.substring(lastIndex)
|
||||
|
||||
// Extract reasoning content
|
||||
const reasoningParts = matches.map((match) => {
|
||||
const content = match[0].replace(/<think>|<\/think>/g, '')
|
||||
return cleanReasoning(content)
|
||||
})
|
||||
currentReasoning = reasoningParts.join('\n\n')
|
||||
}
|
||||
|
||||
// Default: No reasoning found, or it's a message composed entirely of final text.
|
||||
return { reasoningSegment: undefined, textSegment: text }
|
||||
}, [text])
|
||||
// Check for tool calls
|
||||
const isToolCallsPresent = !!(
|
||||
item.metadata &&
|
||||
'tool_calls' in item.metadata &&
|
||||
Array.isArray(item.metadata.tool_calls) &&
|
||||
item.metadata.tool_calls.length > 0
|
||||
)
|
||||
|
||||
// Check if reasoning segment is actually present (i.e., non-empty string)
|
||||
const hasReasoning = !!reasoningSegment
|
||||
hasSteps = hasSteps || isToolCallsPresent
|
||||
|
||||
// Loading if streaming and no final output yet
|
||||
const loading =
|
||||
isStreamingThisThread && currentFinalText.trim().length === 0
|
||||
|
||||
return {
|
||||
finalOutputText: currentFinalText.trim(),
|
||||
streamedReasoningText: currentReasoning,
|
||||
isReasoningActiveLoading: loading,
|
||||
hasReasoningSteps: hasSteps,
|
||||
}
|
||||
}, [item.content, isStreamingThisThread, item.metadata, text])
|
||||
|
||||
const isToolCalls =
|
||||
item.metadata &&
|
||||
'tool_calls' in item.metadata &&
|
||||
Array.isArray(item.metadata.tool_calls) &&
|
||||
item.metadata.tool_calls.length
|
||||
|
||||
const getMessages = useMessages((state) => state.getMessages)
|
||||
const deleteMessage = useMessages((state) => state.deleteMessage)
|
||||
@ -249,12 +303,6 @@ export const ThreadContent = memo(
|
||||
}
|
||||
}, [deleteMessage, getMessages, item])
|
||||
|
||||
const isToolCalls =
|
||||
item.metadata &&
|
||||
'tool_calls' in item.metadata &&
|
||||
Array.isArray(item.metadata.tool_calls) &&
|
||||
item.metadata.tool_calls.length
|
||||
|
||||
const assistant = item.metadata?.assistant as
|
||||
| { avatar?: React.ReactNode; name?: React.ReactNode }
|
||||
| undefined
|
||||
@ -273,6 +321,8 @@ export const ThreadContent = memo(
|
||||
const streamEvents = (item.metadata?.streamEvents as StreamEvent[]) || []
|
||||
const toolCalls = (item.metadata?.tool_calls || []) as ToolCall[]
|
||||
|
||||
const isMessageFinalized = !isStreamingThisThread
|
||||
|
||||
if (streamEvents.length > 0) {
|
||||
// CHRONOLOGICAL PATH: Use streamEvents for true temporal order
|
||||
let reasoningBuffer = ''
|
||||
@ -366,10 +416,10 @@ export const ThreadContent = memo(
|
||||
})
|
||||
})
|
||||
}
|
||||
} else {
|
||||
console.debug('Fallback mode!!!!')
|
||||
// FALLBACK PATH: No streamEvents - use old paragraph-splitting logic
|
||||
const rawReasoningContent = cleanReasoning(reasoningSegment || '')
|
||||
} else if (isMessageFinalized) {
|
||||
// FALLBACK PATH: No streamEvents - use split text for content construction
|
||||
|
||||
const rawReasoningContent = streamedReasoningText || ''
|
||||
const reasoningParagraphs = rawReasoningContent
|
||||
? rawReasoningContent
|
||||
.split(/\n\s*\n/)
|
||||
@ -442,9 +492,9 @@ export const ThreadContent = memo(
|
||||
const totalTime = item.metadata?.totalThinkingTime as number | undefined
|
||||
const lastStepType = steps[steps.length - 1]?.type
|
||||
|
||||
if (!isStreamingThisThread && (hasReasoning || isToolCalls)) {
|
||||
if (!isStreamingThisThread && hasReasoningSteps) {
|
||||
const endsInToolOutputWithoutFinalText =
|
||||
lastStepType === 'tool_output' && textSegment.length === 0
|
||||
lastStepType === 'tool_output' && finalOutputText.length === 0
|
||||
|
||||
if (!endsInToolOutputWithoutFinalText) {
|
||||
steps.push({
|
||||
@ -458,22 +508,34 @@ export const ThreadContent = memo(
|
||||
return steps
|
||||
}, [
|
||||
item,
|
||||
reasoningSegment,
|
||||
isStreamingThisThread,
|
||||
hasReasoning,
|
||||
isToolCalls,
|
||||
textSegment,
|
||||
hasReasoningSteps,
|
||||
finalOutputText,
|
||||
streamedReasoningText,
|
||||
])
|
||||
// END: Constructing allSteps
|
||||
|
||||
// Determine if reasoning phase is actively loading
|
||||
// Loading is true only if streaming is happening AND we haven't started outputting final text yet.
|
||||
const isReasoningActiveLoading =
|
||||
isStreamingThisThread && textSegment.length === 0
|
||||
// ====================================================================
|
||||
// FIX: Determine which text prop to pass to ThinkingBlock
|
||||
// If we have streamEvents, rely on 'steps' and pass an empty text buffer.
|
||||
const streamingTextBuffer = useMemo(() => {
|
||||
const streamEvents = item.metadata?.streamEvents
|
||||
|
||||
// Determine if we should show the thinking block (has reasoning OR tool calls OR currently loading reasoning)
|
||||
// Check if streamEvents exists AND is an array AND has a length greater than 0
|
||||
if (Array.isArray(streamEvents) && streamEvents.length > 0) {
|
||||
// We are using the chronological path (allSteps) for rendering
|
||||
// Return empty string to disable the ThinkingBlock's raw text buffer
|
||||
return ''
|
||||
}
|
||||
|
||||
// Otherwise, rely on the raw text buffer for rendering (used during initial stream fallback)
|
||||
return streamedReasoningText
|
||||
}, [item.metadata?.streamEvents, streamedReasoningText]) // Use the object reference for dependency array
|
||||
// ====================================================================
|
||||
|
||||
// Determine if we should show the thinking block
|
||||
const shouldShowThinkingBlock =
|
||||
hasReasoning || isToolCalls || isReasoningActiveLoading
|
||||
hasReasoningSteps || isToolCalls || isReasoningActiveLoading
|
||||
|
||||
return (
|
||||
<Fragment>
|
||||
@ -614,19 +676,25 @@ export const ThreadContent = memo(
|
||||
<ThinkingBlock
|
||||
id={
|
||||
item.isLastMessage
|
||||
? `${item.thread_id}-last-${(reasoningSegment || text).slice(0, 50).replace(/\s/g, '').slice(-10)}`
|
||||
? `${item.thread_id}-last-${(streamingTextBuffer || text).slice(0, 50).replace(/\s/g, '').slice(-10)}`
|
||||
: `${item.thread_id}-${item.index ?? item.id}`
|
||||
}
|
||||
text={reasoningSegment || ''}
|
||||
// Pass the safe buffer
|
||||
text={streamingTextBuffer}
|
||||
steps={allSteps}
|
||||
loading={isReasoningActiveLoading} // Req 2: False if textSegment is starting
|
||||
loading={isReasoningActiveLoading}
|
||||
duration={
|
||||
item.metadata?.totalThinkingTime as number | undefined
|
||||
}
|
||||
/>
|
||||
)}
|
||||
|
||||
<RenderMarkdown content={textSegment} components={linkComponents} />
|
||||
{!isReasoningActiveLoading && finalOutputText.length > 0 && (
|
||||
<RenderMarkdown
|
||||
content={finalOutputText}
|
||||
components={linkComponents}
|
||||
/>
|
||||
)}
|
||||
|
||||
{!isToolCalls && (
|
||||
<div className="flex items-center gap-2 text-main-view-fg/60 text-xs">
|
||||
|
||||
@ -407,9 +407,6 @@ export const extractToolCall = (
|
||||
return calls
|
||||
}
|
||||
|
||||
// Keep track of total tool steps to prevent infinite loops
|
||||
let toolStepCounter = 0
|
||||
|
||||
/**
|
||||
* Helper function to check if a tool call is a browser MCP tool
|
||||
* @param toolName - The name of the tool
|
||||
@ -533,6 +530,12 @@ const filterOldProactiveScreenshots = (builder: CompletionMessagesBuilder) => {
|
||||
* @param approvedTools
|
||||
* @param showModal
|
||||
* @param allowAllMCPPermissions
|
||||
* @param thread
|
||||
* @param provider
|
||||
* @param tools
|
||||
* @param updateStreamingUI
|
||||
* @param maxToolSteps
|
||||
* @param currentStepCount - Internal counter for recursive calls (do not set manually)
|
||||
* @param isProactiveMode
|
||||
*/
|
||||
export const postMessageProcessing = async (
|
||||
@ -552,16 +555,20 @@ export const postMessageProcessing = async (
|
||||
tools: MCPTool[] = [],
|
||||
updateStreamingUI?: (content: ThreadMessage) => void,
|
||||
maxToolSteps: number = 20,
|
||||
currentStepCount: number = 0,
|
||||
isProactiveMode: boolean = false
|
||||
): Promise<ThreadMessage> => {
|
||||
// Reset counter at the start of a new message processing chain
|
||||
if (toolStepCounter === 0) {
|
||||
toolStepCounter = 0
|
||||
}
|
||||
|
||||
// Handle completed tool calls
|
||||
if (calls.length > 0) {
|
||||
toolStepCounter++
|
||||
// Check limit BEFORE processing
|
||||
if (currentStepCount >= maxToolSteps) {
|
||||
console.warn(
|
||||
`Reached maximum tool steps (${maxToolSteps}), stopping chain to prevent infinite loop`
|
||||
)
|
||||
return message
|
||||
}
|
||||
|
||||
const nextStepCount = currentStepCount + 1
|
||||
|
||||
// Fetch RAG tool names from RAG service
|
||||
let ragToolNames = new Set<string>()
|
||||
@ -687,6 +694,7 @@ export const postMessageProcessing = async (
|
||||
toolCallEntry.response = result
|
||||
toolCallEntry.state = 'ready'
|
||||
if (updateStreamingUI) updateStreamingUI({ ...message }) // Show result
|
||||
|
||||
const streamEvents = (message.metadata?.streamEvents || []) as any[]
|
||||
streamEvents.push({
|
||||
timestamp: Date.now(),
|
||||
@ -701,13 +709,16 @@ export const postMessageProcessing = async (
|
||||
|
||||
// Proactive mode: Capture screenshot/snapshot after browser tool execution
|
||||
if (isProactiveMode && isBrowserTool && !abortController.signal.aborted) {
|
||||
console.log('Proactive mode: Capturing screenshots after browser tool call')
|
||||
console.log(
|
||||
'Proactive mode: Capturing screenshots after browser tool call'
|
||||
)
|
||||
|
||||
// Filter out old screenshots before adding new ones
|
||||
filterOldProactiveScreenshots(builder)
|
||||
|
||||
// Capture new screenshots
|
||||
const proactiveScreenshots = await captureProactiveScreenshots(abortController)
|
||||
const proactiveScreenshots =
|
||||
await captureProactiveScreenshots(abortController)
|
||||
|
||||
// Add proactive screenshots to builder
|
||||
for (const screenshot of proactiveScreenshots) {
|
||||
@ -722,12 +733,8 @@ export const postMessageProcessing = async (
|
||||
// update message metadata
|
||||
}
|
||||
|
||||
if (
|
||||
thread &&
|
||||
provider &&
|
||||
!abortController.signal.aborted &&
|
||||
toolStepCounter < maxToolSteps
|
||||
) {
|
||||
// Process follow-up completion if conditions are met
|
||||
if (thread && provider && !abortController.signal.aborted) {
|
||||
try {
|
||||
const messagesWithToolResults = builder.getMessages()
|
||||
|
||||
@ -750,6 +757,7 @@ export const postMessageProcessing = async (
|
||||
)
|
||||
|
||||
if (isCompletionResponse(followUpCompletion)) {
|
||||
// Handle non-streaming response
|
||||
const choice = followUpCompletion.choices[0]
|
||||
const content = choice?.message?.content
|
||||
if (content) followUpText = content as string
|
||||
@ -759,6 +767,7 @@ export const postMessageProcessing = async (
|
||||
if (textContent?.text) textContent.text.value += followUpText
|
||||
if (updateStreamingUI) updateStreamingUI({ ...message })
|
||||
} else {
|
||||
// Handle streaming response
|
||||
const reasoningProcessor = new ReasoningProcessor()
|
||||
for await (const chunk of followUpCompletion) {
|
||||
if (abortController.signal.aborted) break
|
||||
@ -772,9 +781,9 @@ export const postMessageProcessing = async (
|
||||
if (deltaContent) {
|
||||
textContent.text.value += deltaContent
|
||||
followUpText += deltaContent
|
||||
console.log(`delta content from followup:\n${deltaContent}`)
|
||||
}
|
||||
}
|
||||
|
||||
if (deltaReasoning) {
|
||||
streamEvents.push({
|
||||
timestamp: Date.now(),
|
||||
@ -782,6 +791,7 @@ export const postMessageProcessing = async (
|
||||
data: { content: deltaReasoning },
|
||||
})
|
||||
}
|
||||
|
||||
const initialToolCallCount = newToolCalls.length
|
||||
|
||||
if (chunk.choices[0]?.delta?.tool_calls) {
|
||||
@ -795,6 +805,7 @@ export const postMessageProcessing = async (
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the metadata is updated before calling updateStreamingUI
|
||||
message.metadata = {
|
||||
...(message.metadata ?? {}),
|
||||
@ -802,26 +813,27 @@ export const postMessageProcessing = async (
|
||||
}
|
||||
|
||||
if (updateStreamingUI) {
|
||||
// FIX: Create a new object reference for the content array
|
||||
// Create a new object reference for the content array
|
||||
// This forces the memoized component to detect the change in the mutated text
|
||||
const uiMessage: ThreadMessage = {
|
||||
...message,
|
||||
content: message.content.map((c) => ({ ...c })), // Shallow copy array and its parts
|
||||
content: message.content.map((c) => ({ ...c })),
|
||||
}
|
||||
updateStreamingUI(uiMessage)
|
||||
}
|
||||
}
|
||||
|
||||
if (textContent?.text && updateStreamingUI) {
|
||||
// FIX: Create a new object reference for the content array
|
||||
// This forces the memoized component to detect the change in the mutated text
|
||||
// Final UI update after streaming completes
|
||||
const uiMessage: ThreadMessage = {
|
||||
...message,
|
||||
content: message.content.map((c) => ({ ...c })), // Shallow copy array and its parts
|
||||
content: message.content.map((c) => ({ ...c })),
|
||||
}
|
||||
updateStreamingUI(uiMessage)
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively process new tool calls if any
|
||||
if (newToolCalls.length > 0) {
|
||||
builder.addAssistantMessage(followUpText, undefined, newToolCalls)
|
||||
await postMessageProcessing(
|
||||
@ -837,6 +849,7 @@ export const postMessageProcessing = async (
|
||||
tools,
|
||||
updateStreamingUI,
|
||||
maxToolSteps,
|
||||
nextStepCount, // Pass the incremented step count
|
||||
isProactiveMode
|
||||
)
|
||||
}
|
||||
@ -846,11 +859,23 @@ export const postMessageProcessing = async (
|
||||
'Failed to get follow-up completion after tool execution:',
|
||||
String(error)
|
||||
)
|
||||
// Optionally add error to message metadata for UI display
|
||||
const streamEvents = (message.metadata?.streamEvents || []) as any[]
|
||||
streamEvents.push({
|
||||
timestamp: Date.now(),
|
||||
type: 'error',
|
||||
data: {
|
||||
message: 'Follow-up completion failed',
|
||||
error: String(error),
|
||||
},
|
||||
})
|
||||
message.metadata = {
|
||||
...(message.metadata ?? {}),
|
||||
streamEvents: streamEvents,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset counter when the chain is fully resolved
|
||||
toolStepCounter = 0
|
||||
return message
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user