feat: Proactively take screenshot and snapshot for every browser tool call
This commit is contained in:
parent
c773abb688
commit
e9f469b623
@ -16,6 +16,7 @@ import {
|
|||||||
newUserThreadContent,
|
newUserThreadContent,
|
||||||
postMessageProcessing,
|
postMessageProcessing,
|
||||||
sendCompletion,
|
sendCompletion,
|
||||||
|
captureProactiveScreenshots,
|
||||||
} from '@/lib/completion'
|
} from '@/lib/completion'
|
||||||
import { CompletionMessagesBuilder } from '@/lib/messages'
|
import { CompletionMessagesBuilder } from '@/lib/messages'
|
||||||
import { renderInstructions } from '@/lib/instructionTemplate'
|
import { renderInstructions } from '@/lib/instructionTemplate'
|
||||||
@ -419,6 +420,27 @@ export const useChat = () => {
|
|||||||
})
|
})
|
||||||
: []
|
: []
|
||||||
|
|
||||||
|
// Check if proactive mode is enabled
|
||||||
|
const isProactiveMode = selectedModel?.capabilities?.includes('proactive') ?? false
|
||||||
|
|
||||||
|
// Proactive mode: Capture initial screenshot/snapshot before first LLM call
|
||||||
|
if (isProactiveMode && availableTools.length > 0 && !abortController.signal.aborted) {
|
||||||
|
console.log('Proactive mode: Capturing initial screenshots before LLM call')
|
||||||
|
try {
|
||||||
|
const initialScreenshots = await captureProactiveScreenshots(abortController)
|
||||||
|
|
||||||
|
// Add initial screenshots to builder
|
||||||
|
for (const screenshot of initialScreenshots) {
|
||||||
|
// Generate unique tool call ID for initial screenshot
|
||||||
|
const proactiveToolCallId = `proactive_initial_${Date.now()}_${Math.random()}`
|
||||||
|
builder.addToolMessage(screenshot, proactiveToolCallId)
|
||||||
|
console.log('Initial proactive screenshot added to context')
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('Failed to capture initial proactive screenshots:', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let assistantLoopSteps = 0
|
let assistantLoopSteps = 0
|
||||||
|
|
||||||
while (
|
while (
|
||||||
@ -694,6 +716,10 @@ export const useChat = () => {
|
|||||||
)
|
)
|
||||||
|
|
||||||
builder.addAssistantMessage(accumulatedText, undefined, toolCalls)
|
builder.addAssistantMessage(accumulatedText, undefined, toolCalls)
|
||||||
|
|
||||||
|
// Check if proactive mode is enabled for this model
|
||||||
|
const isProactiveMode = selectedModel?.capabilities?.includes('proactive') ?? false
|
||||||
|
|
||||||
const updatedMessage = await postMessageProcessing(
|
const updatedMessage = await postMessageProcessing(
|
||||||
toolCalls,
|
toolCalls,
|
||||||
builder,
|
builder,
|
||||||
@ -701,7 +727,8 @@ export const useChat = () => {
|
|||||||
abortController,
|
abortController,
|
||||||
useToolApproval.getState().approvedTools,
|
useToolApproval.getState().approvedTools,
|
||||||
allowAllMCPPermissions ? undefined : showApprovalModal,
|
allowAllMCPPermissions ? undefined : showApprovalModal,
|
||||||
allowAllMCPPermissions
|
allowAllMCPPermissions,
|
||||||
|
isProactiveMode
|
||||||
)
|
)
|
||||||
addMessage(updatedMessage ?? finalContent)
|
addMessage(updatedMessage ?? finalContent)
|
||||||
updateStreamingContent(emptyThreadContent)
|
updateStreamingContent(emptyThreadContent)
|
||||||
|
|||||||
@ -378,6 +378,119 @@ export const extractToolCall = (
|
|||||||
return calls
|
return calls
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to check if a tool call is a browser MCP tool
|
||||||
|
* @param toolName - The name of the tool
|
||||||
|
* @returns true if the tool is a browser-related MCP tool
|
||||||
|
*/
|
||||||
|
const isBrowserMCPTool = (toolName: string): boolean => {
|
||||||
|
const browserToolPrefixes = [
|
||||||
|
'browser',
|
||||||
|
'browserbase',
|
||||||
|
'browsermcp',
|
||||||
|
'multi_browserbase',
|
||||||
|
]
|
||||||
|
return browserToolPrefixes.some((prefix) =>
|
||||||
|
toolName.toLowerCase().startsWith(prefix)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to capture screenshot and snapshot proactively
|
||||||
|
* @param abortController - The abort controller for cancellation
|
||||||
|
* @returns Promise with screenshot and snapshot results
|
||||||
|
*/
|
||||||
|
export const captureProactiveScreenshots = async (
|
||||||
|
abortController: AbortController
|
||||||
|
): Promise<ToolResult[]> => {
|
||||||
|
const results: ToolResult[] = []
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get available tools
|
||||||
|
const allTools = await getServiceHub().mcp().getTools()
|
||||||
|
|
||||||
|
// Find screenshot and snapshot tools
|
||||||
|
const screenshotTool = allTools.find((t) =>
|
||||||
|
t.name.toLowerCase().includes('screenshot')
|
||||||
|
)
|
||||||
|
const snapshotTool = allTools.find((t) =>
|
||||||
|
t.name.toLowerCase().includes('snapshot')
|
||||||
|
)
|
||||||
|
|
||||||
|
// Capture screenshot if available
|
||||||
|
if (screenshotTool && !abortController.signal.aborted) {
|
||||||
|
try {
|
||||||
|
const { promise } = getServiceHub().mcp().callToolWithCancellation({
|
||||||
|
toolName: screenshotTool.name,
|
||||||
|
arguments: {},
|
||||||
|
})
|
||||||
|
const screenshotResult = await promise
|
||||||
|
if (screenshotResult && typeof screenshotResult !== 'string') {
|
||||||
|
results.push(screenshotResult as ToolResult)
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('Failed to capture proactive screenshot:', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capture snapshot if available
|
||||||
|
if (snapshotTool && !abortController.signal.aborted) {
|
||||||
|
try {
|
||||||
|
const { promise } = getServiceHub().mcp().callToolWithCancellation({
|
||||||
|
toolName: snapshotTool.name,
|
||||||
|
arguments: {},
|
||||||
|
})
|
||||||
|
const snapshotResult = await promise
|
||||||
|
if (snapshotResult && typeof snapshotResult !== 'string') {
|
||||||
|
results.push(snapshotResult as ToolResult)
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('Failed to capture proactive snapshot:', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to get MCP tools for proactive capture:', e)
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to filter out old screenshot/snapshot images from builder messages
|
||||||
|
* Keeps only the latest proactive screenshots
|
||||||
|
* @param builder - The completion messages builder
|
||||||
|
*/
|
||||||
|
const filterOldProactiveScreenshots = (builder: CompletionMessagesBuilder) => {
|
||||||
|
const messages = builder.getMessages()
|
||||||
|
const filteredMessages: any[] = []
|
||||||
|
|
||||||
|
for (const msg of messages) {
|
||||||
|
if (msg.role === 'tool') {
|
||||||
|
// If it's a tool message with array content (multimodal)
|
||||||
|
if (Array.isArray(msg.content)) {
|
||||||
|
// Filter out images, keep text only for old tool messages
|
||||||
|
const textOnly = msg.content.filter(
|
||||||
|
(part: any) => part.type !== 'image_url'
|
||||||
|
)
|
||||||
|
if (textOnly.length > 0) {
|
||||||
|
filteredMessages.push({ ...msg, content: textOnly })
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Keep string content as-is
|
||||||
|
filteredMessages.push(msg)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Keep all non-tool messages
|
||||||
|
filteredMessages.push(msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reconstruct builder with filtered messages
|
||||||
|
// Note: This is a workaround since CompletionMessagesBuilder doesn't have a setter
|
||||||
|
// We'll need to access the private messages array
|
||||||
|
;(builder as any).messages = filteredMessages
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @fileoverview Helper function to process the completion response.
|
* @fileoverview Helper function to process the completion response.
|
||||||
* @param calls
|
* @param calls
|
||||||
@ -387,6 +500,7 @@ export const extractToolCall = (
|
|||||||
* @param approvedTools
|
* @param approvedTools
|
||||||
* @param showModal
|
* @param showModal
|
||||||
* @param allowAllMCPPermissions
|
* @param allowAllMCPPermissions
|
||||||
|
* @param isProactiveMode
|
||||||
*/
|
*/
|
||||||
export const postMessageProcessing = async (
|
export const postMessageProcessing = async (
|
||||||
calls: ChatCompletionMessageToolCall[],
|
calls: ChatCompletionMessageToolCall[],
|
||||||
@ -399,10 +513,13 @@ export const postMessageProcessing = async (
|
|||||||
threadId: string,
|
threadId: string,
|
||||||
toolParameters?: object
|
toolParameters?: object
|
||||||
) => Promise<boolean>,
|
) => Promise<boolean>,
|
||||||
allowAllMCPPermissions: boolean = false
|
allowAllMCPPermissions: boolean = false,
|
||||||
|
isProactiveMode: boolean = false
|
||||||
) => {
|
) => {
|
||||||
// Handle completed tool calls
|
// Handle completed tool calls
|
||||||
if (calls.length) {
|
if (calls.length) {
|
||||||
|
// Track if any browser MCP tool was called
|
||||||
|
let hasBrowserMCPToolCall = false
|
||||||
// Fetch RAG tool names from RAG service
|
// Fetch RAG tool names from RAG service
|
||||||
let ragToolNames = new Set<string>()
|
let ragToolNames = new Set<string>()
|
||||||
try {
|
try {
|
||||||
@ -455,6 +572,7 @@ export const postMessageProcessing = async (
|
|||||||
const toolName = toolCall.function.name
|
const toolName = toolCall.function.name
|
||||||
const toolArgs = toolCall.function.arguments.length ? toolParameters : {}
|
const toolArgs = toolCall.function.arguments.length ? toolParameters : {}
|
||||||
const isRagTool = ragToolNames.has(toolName)
|
const isRagTool = ragToolNames.has(toolName)
|
||||||
|
const isBrowserTool = isBrowserMCPTool(toolName)
|
||||||
|
|
||||||
// Auto-approve RAG tools (local/safe operations), require permission for MCP tools
|
// Auto-approve RAG tools (local/safe operations), require permission for MCP tools
|
||||||
const approved = isRagTool
|
const approved = isRagTool
|
||||||
@ -544,6 +662,32 @@ export const postMessageProcessing = async (
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
builder.addToolMessage(result as ToolResult, toolCall.id)
|
builder.addToolMessage(result as ToolResult, toolCall.id)
|
||||||
|
|
||||||
|
// Mark if we used a browser tool (for proactive mode)
|
||||||
|
if (isBrowserTool) {
|
||||||
|
hasBrowserMCPToolCall = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Proactive mode: Capture screenshot/snapshot after browser tool execution
|
||||||
|
if (isProactiveMode && isBrowserTool && !abortController.signal.aborted) {
|
||||||
|
console.log('Proactive mode: Capturing screenshots after browser tool call')
|
||||||
|
|
||||||
|
// Filter out old screenshots before adding new ones
|
||||||
|
filterOldProactiveScreenshots(builder)
|
||||||
|
|
||||||
|
// Capture new screenshots
|
||||||
|
const proactiveScreenshots = await captureProactiveScreenshots(abortController)
|
||||||
|
|
||||||
|
// Add proactive screenshots to builder
|
||||||
|
for (const screenshot of proactiveScreenshots) {
|
||||||
|
// Generate a unique tool call ID for the proactive screenshot
|
||||||
|
const proactiveToolCallId = ulid()
|
||||||
|
builder.addToolMessage(screenshot, proactiveToolCallId)
|
||||||
|
|
||||||
|
console.log('Proactive screenshot captured and added to context')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// update message metadata
|
// update message metadata
|
||||||
}
|
}
|
||||||
return message
|
return message
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user